Ejemplo n.º 1
0
        self.write_file('metacodon_counts', metacodon_counts)

        read_positions = self.load_read_positions()
        metanucleotide_counts = positions.compute_metanucleotide_counts(read_positions)
        self.write_file('metanucleotide_counts', metanucleotide_counts)

    def compute_mean_densities(self):
        codon_counts = self.read_file('buffered_codon_counts', merged=True)
        mean_densities = positions.compute_averaged_codon_densities(codon_counts)
        self.write_file('mean_densities', mean_densities)
        mean_densities_anisomycin = positions.compute_averaged_codon_densities(codon_counts, offset_key='anisomycin')
        self.write_file('mean_densities_anisomycin', mean_densities_anisomycin)

        if self.possibly_misannotated_file_name != None:
            possibly_misannotated_names = {line.strip() for line in open(self.possibly_misannotated_file_name)}
            mean_densities = positions.compute_averaged_codon_densities(codon_counts, possibly_misannotated_names)
            self.write_file('mean_densities_no_misannotated', mean_densities)
        
    def compute_RPKMs(self, exclude_from_start=0, exclude_from_end=0):
        gene_infos = self.read_file('read_counts', merged=True)
        RPKMs = positions.compute_RPKMs(gene_infos, 0, 0)
        self.write_file('RPKMs', RPKMs)
        
        gene_infos = self.read_file('read_counts_exclude_edges', merged=True)
        RPKMs_exclude_edges = positions.compute_RPKMs(gene_infos, 30, 4)
        self.write_file('RPKMs_exclude_edges', RPKMs_exclude_edges)

if __name__ == '__main__':
    script_path = os.path.realpath(__file__)
    map_reduce.controller(RibosomeProfilingExperiment, script_path)
Ejemplo n.º 2
0
            }
            processed_read_positions[name] = gene

        metagene_positions = positions.compute_metagene_positions(
            piece_CDSs,
            processed_read_positions,
            max_gene_length,
        )

        self.write_file('metagene_positions', metagene_positions)

    def plot_starts_and_ends(self):
        metagene_positions = self.read_file('metagene_positions')

        visualize.plot_metagene_positions(
            metagene_positions,
            self.figure_file_names['starts_and_ends'],
            ['three_prime_genomic', 'three_prime_nongenomic'],
        )

    def get_total_eligible_reads(self):
        summary_pairs = self.read_file('summary')
        summary_dict = {name: values[0] for name, values in summary_pairs}
        total_mapped_reads = summary_dict['Nonunique'] + summary_dict['Unique']
        return total_mapped_reads


if __name__ == '__main__':
    script_path = os.path.realpath(__file__)
    map_reduce.controller(ThreePExperiment, script_path)
            if position != None:
                trimmed_lengths[position] += 1
                if position - len(self.barcode) < 12:
                    continue
            else:
                position = len(R1.seq)

            long_enough_reads += 1

            payload_slice = slice(len(self.barcode), position)

            processed_R1 = fastq.Read(R1.name, R1.seq[payload_slice],
                                      R1.qual[payload_slice])
            processed_R2 = fastq.make_record(R2.name, R2.seq[payload_slice],
                                             R2.qual[payload_slice])

            yield processed_R1, processed_R2

        trimmed_lengths = utilities.counts_to_array(trimmed_lengths)
        self.write_file('trimmed_lengths', trimmed_lengths)
        self.write_file('barcodes', barcodes)
        self.summary.extend([
            ('Total read pairs', total_reads),
            ('Long enough', long_enough_reads),
        ])


if __name__ == '__main__':
    script_path = os.path.realpath(__file__)
    map_reduce.controller(WilkeningRNASeqExperiment, script_path)
Ejemplo n.º 4
0
                unmapped = any(m.is_unmapped for m in group)
                if unmapped:
                    num_unmapped += 1
                    continue

                nonunique = len(group) > 1 or any(m.mapq < 40 for m in group)
                if nonunique:
                    num_nonunique += 1
                else:
                    num_unique += 1

                for mapping in group:
                    alignment_sorter.write(mapping)
            
        self.summary.extend(
            [('Unmapped', num_unmapped),
             ('Nonunique', num_nonunique),
             ('Unique', num_unique),
            ],
        )
    
    def get_total_eligible_reads(self):
        summary_pairs = self.read_file('summary')
        summary_dict = {name: values[0] for name, values in summary_pairs}
        total_mapped_reads = summary_dict['Nonunique'] + summary_dict['Unique']
        return total_mapped_reads

if __name__ == '__main__':
    script_path = os.path.realpath(__file__)
    map_reduce.controller(TLSeqExperiment, script_path)
Ejemplo n.º 5
0
    def plot_polyA_lengths(self):
        fig, ax = plt.subplots()

        array = self.read_file('polyA_lengths')
        ax.plot(array, '.-', label='polyA_lengths')

        ax.legend(loc='upper right', framealpha=0.5)
        fig.savefig(self.figure_file_names['polyA_lengths'])


    def get_joint_position_counts(self, gene_name):
        CDSs, _ = self.get_CDSs()
        CDS_dict = {t.name: t for t in CDSs}
        transcript = CDS_dict[gene_name]

        joint_position_counts = positions.get_joint_position_counts_sparse(self.file_names['combined_extended_sorted'],
                                                                           transcript,
                                                                          )
        return joint_position_counts, transcript

    def get_total_eligible_reads(self):
        summary_pairs = self.read_file('summary')
        summary_dict = {name: values[0] for name, values in summary_pairs}
        total_mapped_reads = summary_dict['Nonunique'] + summary_dict['Concordant']
        return total_mapped_reads

if __name__ == '__main__':
    script_path = os.path.realpath(__file__)
    map_reduce.controller(TIFSeqExperiment, script_path)
Ejemplo n.º 6
0
                    'three_prime_nongenomic': read_positions[name]['all'] - read_positions[name][0],
                    'three_prime_nonunique': three_prime_counts['all_nonunique'],
                    'sequence': read_positions[name]['sequence'],
                   }
            processed_read_positions[name] = gene
    
        metagene_positions = positions.compute_metagene_positions(piece_CDSs,
                                                                  processed_read_positions,
                                                                  max_gene_length,
                                                                 )

        self.write_file('metagene_positions', metagene_positions)
    
    def plot_starts_and_ends(self):
        metagene_positions = self.read_file('metagene_positions')

        visualize.plot_metagene_positions(metagene_positions,
                                          self.figure_file_names['starts_and_ends'],
                                          ['three_prime_genomic', 'three_prime_nongenomic'],
                                         )

    def get_total_eligible_reads(self):
        summary_pairs = self.read_file('summary')
        summary_dict = {name: values[0] for name, values in summary_pairs}
        total_mapped_reads = summary_dict['Nonunique'] + summary_dict['Unique']
        return total_mapped_reads

if __name__ == '__main__':
    script_path = os.path.realpath(__file__)
    map_reduce.controller(ThreePExperiment, script_path)
                                                               3,
                                                              )
            if position != None:
                trimmed_lengths[position] += 1
                if position - len(self.barcode) < 12:
                    continue
            else:
                position = len(R1.seq)

            long_enough_reads += 1

            payload_slice = slice(len(self.barcode), position)

            processed_R1 = fastq.Read(R1.name, R1.seq[payload_slice], R1.qual[payload_slice])
            processed_R2 = fastq.make_record(R2.name, R2.seq[payload_slice], R2.qual[payload_slice])
            
            yield processed_R1, processed_R2

        trimmed_lengths = utilities.counts_to_array(trimmed_lengths)
        self.write_file('trimmed_lengths', trimmed_lengths)
        self.write_file('barcodes', barcodes)
        self.summary.extend(
            [('Total read pairs', total_reads),
             ('Long enough', long_enough_reads),
            ]
        )

if __name__ == '__main__':
    script_path = os.path.realpath(__file__)
    map_reduce.controller(WilkeningRNASeqExperiment, script_path)
Ejemplo n.º 8
0
                # nonunqiue.
                for transcript_name, transcript_position in genomic_to_all_transcripts[
                        full_mapped_position]:
                    uniqueness[transcript_name][self.fragment_length][
                        'start_codon', transcript_position] = 2
            else:
                # Check that any read with a MAPQ of 50 is to the expected position.
                full_true_position = (
                    true_transcript.seqname,
                    true_transcript.strand,
                    true_transcript.transcript_to_genomic[true_position],
                )

                if read.mapq == 50 and (full_mapped_position !=
                                        full_true_position):
                    raise ValueError(full_mapped_position, full_true_position)

                # As long as this hasn't been mapped to by some other fragment,
                # mark it as unique.
                if uniqueness[true_transcript.name][self.fragment_length][
                        'start_codon', true_position] == 0:
                    uniqueness[true_transcript.name][self.fragment_length][
                        'start_codon', true_position] = 1

        self.write_file('uniqueness', uniqueness)


if __name__ == '__main__':
    script_path = os.path.realpath(__file__)
    map_reduce.controller(MappabilityExperiment, script_path)
Ejemplo n.º 9
0
            full_mapped_position = (bam_file.getrname(read.tid), strand, five_prime)

            if read.mapq < 50:
                # Flag the true source of the read as nonunique.
                uniqueness[true_transcript.name][self.fragment_length]['start_codon', true_position] = 2
                
                # Hopefully redundantly, flag the position actually mapped to as
                # nonunqiue.
                for transcript_name, transcript_position in genomic_to_all_transcripts[full_mapped_position]:
                    uniqueness[transcript_name][self.fragment_length]['start_codon', transcript_position] = 2
            else:
                # Check that any read with a MAPQ of 50 is to the expected position.
                full_true_position = (true_transcript.seqname,
                                      true_transcript.strand,
                                      true_transcript.transcript_to_genomic[true_position],
                                     )

                if read.mapq == 50 and (full_mapped_position != full_true_position):
                    raise ValueError(full_mapped_position, full_true_position)
                
                # As long as this hasn't been mapped to by some other fragment,
                # mark it as unique.
                if uniqueness[true_transcript.name][self.fragment_length]['start_codon', true_position] == 0: 
                    uniqueness[true_transcript.name][self.fragment_length]['start_codon', true_position] = 1

        self.write_file('uniqueness', uniqueness)

if __name__ == '__main__':
    script_path = os.path.realpath(__file__)
    map_reduce.controller(MappabilityExperiment, script_path)