self.write_file('metacodon_counts', metacodon_counts) read_positions = self.load_read_positions() metanucleotide_counts = positions.compute_metanucleotide_counts(read_positions) self.write_file('metanucleotide_counts', metanucleotide_counts) def compute_mean_densities(self): codon_counts = self.read_file('buffered_codon_counts', merged=True) mean_densities = positions.compute_averaged_codon_densities(codon_counts) self.write_file('mean_densities', mean_densities) mean_densities_anisomycin = positions.compute_averaged_codon_densities(codon_counts, offset_key='anisomycin') self.write_file('mean_densities_anisomycin', mean_densities_anisomycin) if self.possibly_misannotated_file_name != None: possibly_misannotated_names = {line.strip() for line in open(self.possibly_misannotated_file_name)} mean_densities = positions.compute_averaged_codon_densities(codon_counts, possibly_misannotated_names) self.write_file('mean_densities_no_misannotated', mean_densities) def compute_RPKMs(self, exclude_from_start=0, exclude_from_end=0): gene_infos = self.read_file('read_counts', merged=True) RPKMs = positions.compute_RPKMs(gene_infos, 0, 0) self.write_file('RPKMs', RPKMs) gene_infos = self.read_file('read_counts_exclude_edges', merged=True) RPKMs_exclude_edges = positions.compute_RPKMs(gene_infos, 30, 4) self.write_file('RPKMs_exclude_edges', RPKMs_exclude_edges) if __name__ == '__main__': script_path = os.path.realpath(__file__) map_reduce.controller(RibosomeProfilingExperiment, script_path)
} processed_read_positions[name] = gene metagene_positions = positions.compute_metagene_positions( piece_CDSs, processed_read_positions, max_gene_length, ) self.write_file('metagene_positions', metagene_positions) def plot_starts_and_ends(self): metagene_positions = self.read_file('metagene_positions') visualize.plot_metagene_positions( metagene_positions, self.figure_file_names['starts_and_ends'], ['three_prime_genomic', 'three_prime_nongenomic'], ) def get_total_eligible_reads(self): summary_pairs = self.read_file('summary') summary_dict = {name: values[0] for name, values in summary_pairs} total_mapped_reads = summary_dict['Nonunique'] + summary_dict['Unique'] return total_mapped_reads if __name__ == '__main__': script_path = os.path.realpath(__file__) map_reduce.controller(ThreePExperiment, script_path)
if position != None: trimmed_lengths[position] += 1 if position - len(self.barcode) < 12: continue else: position = len(R1.seq) long_enough_reads += 1 payload_slice = slice(len(self.barcode), position) processed_R1 = fastq.Read(R1.name, R1.seq[payload_slice], R1.qual[payload_slice]) processed_R2 = fastq.make_record(R2.name, R2.seq[payload_slice], R2.qual[payload_slice]) yield processed_R1, processed_R2 trimmed_lengths = utilities.counts_to_array(trimmed_lengths) self.write_file('trimmed_lengths', trimmed_lengths) self.write_file('barcodes', barcodes) self.summary.extend([ ('Total read pairs', total_reads), ('Long enough', long_enough_reads), ]) if __name__ == '__main__': script_path = os.path.realpath(__file__) map_reduce.controller(WilkeningRNASeqExperiment, script_path)
unmapped = any(m.is_unmapped for m in group) if unmapped: num_unmapped += 1 continue nonunique = len(group) > 1 or any(m.mapq < 40 for m in group) if nonunique: num_nonunique += 1 else: num_unique += 1 for mapping in group: alignment_sorter.write(mapping) self.summary.extend( [('Unmapped', num_unmapped), ('Nonunique', num_nonunique), ('Unique', num_unique), ], ) def get_total_eligible_reads(self): summary_pairs = self.read_file('summary') summary_dict = {name: values[0] for name, values in summary_pairs} total_mapped_reads = summary_dict['Nonunique'] + summary_dict['Unique'] return total_mapped_reads if __name__ == '__main__': script_path = os.path.realpath(__file__) map_reduce.controller(TLSeqExperiment, script_path)
def plot_polyA_lengths(self): fig, ax = plt.subplots() array = self.read_file('polyA_lengths') ax.plot(array, '.-', label='polyA_lengths') ax.legend(loc='upper right', framealpha=0.5) fig.savefig(self.figure_file_names['polyA_lengths']) def get_joint_position_counts(self, gene_name): CDSs, _ = self.get_CDSs() CDS_dict = {t.name: t for t in CDSs} transcript = CDS_dict[gene_name] joint_position_counts = positions.get_joint_position_counts_sparse(self.file_names['combined_extended_sorted'], transcript, ) return joint_position_counts, transcript def get_total_eligible_reads(self): summary_pairs = self.read_file('summary') summary_dict = {name: values[0] for name, values in summary_pairs} total_mapped_reads = summary_dict['Nonunique'] + summary_dict['Concordant'] return total_mapped_reads if __name__ == '__main__': script_path = os.path.realpath(__file__) map_reduce.controller(TIFSeqExperiment, script_path)
'three_prime_nongenomic': read_positions[name]['all'] - read_positions[name][0], 'three_prime_nonunique': three_prime_counts['all_nonunique'], 'sequence': read_positions[name]['sequence'], } processed_read_positions[name] = gene metagene_positions = positions.compute_metagene_positions(piece_CDSs, processed_read_positions, max_gene_length, ) self.write_file('metagene_positions', metagene_positions) def plot_starts_and_ends(self): metagene_positions = self.read_file('metagene_positions') visualize.plot_metagene_positions(metagene_positions, self.figure_file_names['starts_and_ends'], ['three_prime_genomic', 'three_prime_nongenomic'], ) def get_total_eligible_reads(self): summary_pairs = self.read_file('summary') summary_dict = {name: values[0] for name, values in summary_pairs} total_mapped_reads = summary_dict['Nonunique'] + summary_dict['Unique'] return total_mapped_reads if __name__ == '__main__': script_path = os.path.realpath(__file__) map_reduce.controller(ThreePExperiment, script_path)
3, ) if position != None: trimmed_lengths[position] += 1 if position - len(self.barcode) < 12: continue else: position = len(R1.seq) long_enough_reads += 1 payload_slice = slice(len(self.barcode), position) processed_R1 = fastq.Read(R1.name, R1.seq[payload_slice], R1.qual[payload_slice]) processed_R2 = fastq.make_record(R2.name, R2.seq[payload_slice], R2.qual[payload_slice]) yield processed_R1, processed_R2 trimmed_lengths = utilities.counts_to_array(trimmed_lengths) self.write_file('trimmed_lengths', trimmed_lengths) self.write_file('barcodes', barcodes) self.summary.extend( [('Total read pairs', total_reads), ('Long enough', long_enough_reads), ] ) if __name__ == '__main__': script_path = os.path.realpath(__file__) map_reduce.controller(WilkeningRNASeqExperiment, script_path)
# nonunqiue. for transcript_name, transcript_position in genomic_to_all_transcripts[ full_mapped_position]: uniqueness[transcript_name][self.fragment_length][ 'start_codon', transcript_position] = 2 else: # Check that any read with a MAPQ of 50 is to the expected position. full_true_position = ( true_transcript.seqname, true_transcript.strand, true_transcript.transcript_to_genomic[true_position], ) if read.mapq == 50 and (full_mapped_position != full_true_position): raise ValueError(full_mapped_position, full_true_position) # As long as this hasn't been mapped to by some other fragment, # mark it as unique. if uniqueness[true_transcript.name][self.fragment_length][ 'start_codon', true_position] == 0: uniqueness[true_transcript.name][self.fragment_length][ 'start_codon', true_position] = 1 self.write_file('uniqueness', uniqueness) if __name__ == '__main__': script_path = os.path.realpath(__file__) map_reduce.controller(MappabilityExperiment, script_path)
full_mapped_position = (bam_file.getrname(read.tid), strand, five_prime) if read.mapq < 50: # Flag the true source of the read as nonunique. uniqueness[true_transcript.name][self.fragment_length]['start_codon', true_position] = 2 # Hopefully redundantly, flag the position actually mapped to as # nonunqiue. for transcript_name, transcript_position in genomic_to_all_transcripts[full_mapped_position]: uniqueness[transcript_name][self.fragment_length]['start_codon', transcript_position] = 2 else: # Check that any read with a MAPQ of 50 is to the expected position. full_true_position = (true_transcript.seqname, true_transcript.strand, true_transcript.transcript_to_genomic[true_position], ) if read.mapq == 50 and (full_mapped_position != full_true_position): raise ValueError(full_mapped_position, full_true_position) # As long as this hasn't been mapped to by some other fragment, # mark it as unique. if uniqueness[true_transcript.name][self.fragment_length]['start_codon', true_position] == 0: uniqueness[true_transcript.name][self.fragment_length]['start_codon', true_position] = 1 self.write_file('uniqueness', uniqueness) if __name__ == '__main__': script_path = os.path.realpath(__file__) map_reduce.controller(MappabilityExperiment, script_path)