Beispiel #1
0
    def test_get_pair_type(self):
        '''Test get_pair_type'''
        expected = [(mapping.CAN_EXTEND_LEFT, mapping.KEEP),
                    (mapping.KEEP, mapping.CAN_EXTEND_RIGHT),
                    (mapping.KEEP, mapping.KEEP),
                    (mapping.NOT_USEFUL, mapping.NOT_USEFUL),
                    (mapping.CAN_EXTEND_LEFT, mapping.KEEP),
                    (mapping.BOTH_UNMAPPED, mapping.BOTH_UNMAPPED),
                    (mapping.NOT_USEFUL, mapping.NOT_USEFUL)]

        sam_reader = pysam.Samfile(
            os.path.join(data_dir, 'mapping_test.smalt.out.bam'), "rb")
        previous_sam = None
        i = 0
        for sam in sam_reader.fetch(until_eof=True):
            if previous_sam is None:
                previous_sam = sam
                continue

            types = mapping.get_pair_type(previous_sam,
                                          sam,
                                          190,
                                          1000,
                                          min_clip=2)
            self.assertEqual(types, expected[i])
            i += 1
            previous_sam = None
Beispiel #2
0
    def _extend_contigs_with_bam(self, bam_in, out_prefix=None, output_all_useful_reads=False):
        if out_prefix is not None:
            fa_out1 = pyfastaq.utils.open_file_write(out_prefix + '_1.fa')
            fa_out2 = pyfastaq.utils.open_file_write(out_prefix + '_2.fa')
        keep_read_types = set([mapping.CAN_EXTEND_LEFT, mapping.CAN_EXTEND_RIGHT, mapping.KEEP])
        if output_all_useful_reads:
            keep_read_types.add(mapping.BOTH_UNMAPPED)
        previous_sam = None
        left_seqs = []
        right_seqs = []
        sam_reader = pysam.Samfile(bam_in, "rb")

        for current_sam in sam_reader.fetch(until_eof=True):
            if previous_sam is None:
                previous_sam = current_sam
                continue

            previous_type, current_type = mapping.get_pair_type(previous_sam, current_sam, self._get_ref_length_sam_pair(sam_reader, previous_sam, current_sam), self.max_insert, min_clip=self.min_clip)

            for sam, sam_type in [(previous_sam, previous_type), (current_sam, current_type)]:
                if sam_type == mapping.CAN_EXTEND_LEFT:
                    name = mapping.get_ref_name(sam, sam_reader)
                    clipped = mapping.soft_clipped(sam)[0]
                    self.contigs[name].add_left_kmer(common.decode(sam.seq[:clipped]))
                elif sam_type == mapping.CAN_EXTEND_RIGHT:
                    name = mapping.get_ref_name(sam, sam_reader)
                    self.contigs[name].add_right_kmer(common.decode(sam.seq[sam.qend:]))

                if out_prefix is not None and sam_type in keep_read_types:
                    if sam.is_read1:
                        print(mapping.sam_to_fasta(sam), file=fa_out1)
                    else:
                        print(mapping.sam_to_fasta(sam), file=fa_out2)

            previous_sam = None

        if out_prefix is not None:
            pyfastaq.utils.close(fa_out1)
            pyfastaq.utils.close(fa_out2)
        total_bases_added = 0

        for ctg in self.contigs:
            left_length, right_length = self.contigs[ctg].extend(self.ext_min_cov, self.ext_min_ratio, self.ext_bases)
            if self.verbose:
                print('    extend contig ' +  ctg, 'new_length:' + str(len(self.contigs[ctg])), 'added_left:' + str(left_length), 'added_right:' + str(right_length), sep='\t')
            self.contig_lengths[ctg].append([len(self.contigs[ctg]), left_length, right_length])
            total_bases_added += left_length + right_length

        return total_bases_added
Beispiel #3
0
    def _extend_contigs_with_bam(self, bam_in, out_prefix=None, output_all_useful_reads=False):
        if out_prefix is not None:
            fa_out1 = pyfastaq.utils.open_file_write(out_prefix + '_1.fa')
            fa_out2 = pyfastaq.utils.open_file_write(out_prefix + '_2.fa')
        keep_read_types = set([mapping.CAN_EXTEND_LEFT, mapping.CAN_EXTEND_RIGHT, mapping.KEEP])
        if output_all_useful_reads:
            keep_read_types.add(mapping.BOTH_UNMAPPED)
        previous_sam = None
        left_seqs = []
        right_seqs = []
        sam_reader = pysam.Samfile(bam_in, "rb")

        for current_sam in sam_reader.fetch(until_eof=True):
            if previous_sam is None:
                previous_sam = current_sam
                continue

            previous_type, current_type = mapping.get_pair_type(previous_sam, current_sam, self._get_ref_length_sam_pair(sam_reader, previous_sam, current_sam), self.max_insert, min_clip=self.min_clip)

            for sam, sam_type in [(previous_sam, previous_type), (current_sam, current_type)]:
                if sam_type == mapping.CAN_EXTEND_LEFT:
                    name = mapping.get_ref_name(sam, sam_reader)
                    clipped = mapping.soft_clipped(sam)[0]
                    self.contigs[name].add_left_kmer(common.decode(sam.seq[:clipped]))
                elif sam_type == mapping.CAN_EXTEND_RIGHT:
                    name = mapping.get_ref_name(sam, sam_reader)
                    self.contigs[name].add_right_kmer(common.decode(sam.seq[sam.qend:]))

                if out_prefix is not None and sam_type in keep_read_types:
                    if sam.is_read1:
                        print(mapping.sam_to_fasta(sam), file=fa_out1)
                    else:
                        print(mapping.sam_to_fasta(sam), file=fa_out2)

            previous_sam = None

        if out_prefix is not None:
            pyfastaq.utils.close(fa_out1)
            pyfastaq.utils.close(fa_out2)
        total_bases_added = 0

        for ctg in self.contigs:
            left_length, right_length = self.contigs[ctg].extend(self.ext_min_cov, self.ext_min_ratio, self.ext_bases)
            if self.verbose:
                print('    extend contig ' +  ctg, 'new_length:' + str(len(self.contigs[ctg])), 'added_left:' + str(left_length), 'added_right:' + str(right_length), sep='\t')
            self.contig_lengths[ctg].append([len(self.contigs[ctg]), left_length, right_length])
            total_bases_added += left_length + right_length

        return total_bases_added
Beispiel #4
0
    def test_get_pair_type(self):
        '''Test get_pair_type'''
        expected = [
            (mapping.CAN_EXTEND_LEFT, mapping.KEEP),
            (mapping.KEEP, mapping.CAN_EXTEND_RIGHT),
            (mapping.KEEP, mapping.KEEP),
            (mapping.NOT_USEFUL, mapping.NOT_USEFUL),
            (mapping.CAN_EXTEND_LEFT, mapping.KEEP),
            (mapping.BOTH_UNMAPPED, mapping.BOTH_UNMAPPED),
            (mapping.NOT_USEFUL, mapping.NOT_USEFUL)
        ]

        sam_reader = pysam.Samfile(os.path.join(data_dir, 'mapping_test.smalt.out.bam'), "rb")
        previous_sam = None
        i = 0
        for sam in sam_reader.fetch(until_eof=True):
            if previous_sam is None:
                previous_sam = sam
                continue

            types = mapping.get_pair_type(previous_sam, sam, 190, 1000, min_clip=2)
            self.assertEqual(types, expected[i])
            i += 1
            previous_sam = None