예제 #1
0
    def process_remapped_unmapped(self):
        unmapped_lengths = np.zeros(self.max_read_length + 1, int)
        unmapped_seq_counts = Counter()

        long_polyA_lengths = np.zeros(self.max_read_length + 1, int)
        long_polyA_counts = Counter()
        
        unmapped_reads = sam.bam_to_fastq(self.file_names['remapped_unmapped_bam'])
        unretrimmed_reads = trim.untrim_reads(unmapped_reads, second_time=True)
        synthetic_filtered_reads = self.filter_synthetic_sequences(unretrimmed_reads)

        def record_common(reads):
            for read in reads:
                if predominantly_A(read.seq):
                    long_polyA_lengths[len(read.seq)] += 1
                    long_polyA_counts[read.seq] += 1
                else:
                    unmapped_lengths[len(read.seq)] += 1
                    unmapped_seq_counts[read.seq] += 1
                yield read

        synthetic_filtered_reads = record_common(synthetic_filtered_reads)
        
        seq_info_pairs = ((read.seq, False) for read in synthetic_filtered_reads)
        all_array, _ = composition.length_stratified_composition(seq_info_pairs, self.max_read_length)
        
        self.write_file('unmapped_composition', all_array)
        

        self.write_file('lengths', {'unmapped': unmapped_lengths,
                                    'long_polyA': long_polyA_lengths,
                                   },
                       )
        
        non_long_polyA = Counter(dict(unmapped_seq_counts.most_common(100)))
        long_polyA = Counter(dict(long_polyA_counts.most_common(100)))
        
        common_unmapped = {'non_long_polyA': non_long_polyA,
                           'long_polyA': long_polyA,
                           }
        self.write_file('common_unmapped', common_unmapped)
예제 #2
0
 def process_initially_unmapped(self):
     unmapped_reads = sam.bam_to_fastq(self.file_names['unmapped_bam'])
     no_phiX_reads = self.filter_phiX(unmapped_reads)
     self.remap_polyA_trimmed(no_phiX_reads)