Ejemplo n.º 1
0
    def post_filter_contaminants(self):
        contaminants.post_filter(self.file_names['accepted_hits'],
                                 self.file_names['genes'],
                                 self.file_names['clean_bam'],
                                 self.file_names['more_rRNA_bam'],
                                 self.file_names['tRNA_bam'],
                                 self.file_names['other_ncRNA_bam'],
                                )

        tRNA_length_counts = sam.get_length_counts(self.file_names['tRNA_bam'])
        tRNA_lengths = self.zero_padded_array(tRNA_length_counts)
        
        other_ncRNA_length_counts = sam.get_length_counts(self.file_names['other_ncRNA_bam'])
        other_ncRNA_lengths = self.zero_padded_array(other_ncRNA_length_counts)

        rRNA_length_counts = sam.get_length_counts(self.file_names['rRNA_bam'])
        rRNA_length_counts += sam.get_length_counts(self.file_names['more_rRNA_bam'])
        rRNA_lengths = self.zero_padded_array(rRNA_length_counts)
        
        clean_length_counts = sam.get_length_counts(self.file_names['clean_bam'])
        clean_lengths = self.zero_padded_array(clean_length_counts)
        
        self.write_file('lengths', {'clean': clean_lengths,
                                    'tRNA': tRNA_lengths,
                                    'other_ncRNA': other_ncRNA_lengths,
                                    'rRNA': rRNA_lengths,
                                   },
                       )
Ejemplo n.º 2
0
    def filter_phiX(self, reads):
        filtered_reads = contaminants.pre_filter(self.phiX_bowtie2_index_prefix,
                                                 reads,
                                                 self.file_names['phiX_bam'],
                                                )
        for read in filtered_reads:
            yield read

        phiX_length_counts = sam.get_length_counts(self.file_names['phiX_bam'])
        phiX_lengths = self.zero_padded_array(phiX_length_counts)
        self.write_file('lengths', {'phiX': phiX_lengths})
Ejemplo n.º 3
0
    def find_unambiguous_lengths(self):
        if self.adapter_type == 'polyA':
            trim.unambiguously_trimmed(self.file_names['clean_bam'],
                                       self.file_names['unambiguous_bam'],
                                       self.file_names['genome'],
                                      )
            unambiguous_length_counts = sam.get_length_counts(self.file_names['unambiguous_bam'])
            unambiguous_lengths = self.zero_padded_array(unambiguous_length_counts)
        else:
            # Need to write the file so that there is something to merge.
            unambiguous_lengths = np.zeros(self.max_read_length + 1, int)

        self.write_file('lengths', {'unambiguous': unambiguous_lengths})