def post_filter_contaminants(self): contaminants.post_filter(self.file_names['accepted_hits'], self.file_names['genes'], self.file_names['clean_bam'], self.file_names['more_rRNA_bam'], self.file_names['tRNA_bam'], self.file_names['other_ncRNA_bam'], ) tRNA_length_counts = sam.get_length_counts(self.file_names['tRNA_bam']) tRNA_lengths = self.zero_padded_array(tRNA_length_counts) other_ncRNA_length_counts = sam.get_length_counts(self.file_names['other_ncRNA_bam']) other_ncRNA_lengths = self.zero_padded_array(other_ncRNA_length_counts) rRNA_length_counts = sam.get_length_counts(self.file_names['rRNA_bam']) rRNA_length_counts += sam.get_length_counts(self.file_names['more_rRNA_bam']) rRNA_lengths = self.zero_padded_array(rRNA_length_counts) clean_length_counts = sam.get_length_counts(self.file_names['clean_bam']) clean_lengths = self.zero_padded_array(clean_length_counts) self.write_file('lengths', {'clean': clean_lengths, 'tRNA': tRNA_lengths, 'other_ncRNA': other_ncRNA_lengths, 'rRNA': rRNA_lengths, }, )
def filter_phiX(self, reads): filtered_reads = contaminants.pre_filter(self.phiX_bowtie2_index_prefix, reads, self.file_names['phiX_bam'], ) for read in filtered_reads: yield read phiX_length_counts = sam.get_length_counts(self.file_names['phiX_bam']) phiX_lengths = self.zero_padded_array(phiX_length_counts) self.write_file('lengths', {'phiX': phiX_lengths})
def find_unambiguous_lengths(self): if self.adapter_type == 'polyA': trim.unambiguously_trimmed(self.file_names['clean_bam'], self.file_names['unambiguous_bam'], self.file_names['genome'], ) unambiguous_length_counts = sam.get_length_counts(self.file_names['unambiguous_bam']) unambiguous_lengths = self.zero_padded_array(unambiguous_length_counts) else: # Need to write the file so that there is something to merge. unambiguous_lengths = np.zeros(self.max_read_length + 1, int) self.write_file('lengths', {'unambiguous': unambiguous_lengths})