def test_get_pair_type(self): '''Test get_pair_type''' expected = [(mapping.CAN_EXTEND_LEFT, mapping.KEEP), (mapping.KEEP, mapping.CAN_EXTEND_RIGHT), (mapping.KEEP, mapping.KEEP), (mapping.NOT_USEFUL, mapping.NOT_USEFUL), (mapping.CAN_EXTEND_LEFT, mapping.KEEP), (mapping.BOTH_UNMAPPED, mapping.BOTH_UNMAPPED), (mapping.NOT_USEFUL, mapping.NOT_USEFUL)] sam_reader = pysam.Samfile( os.path.join(data_dir, 'mapping_test.smalt.out.bam'), "rb") previous_sam = None i = 0 for sam in sam_reader.fetch(until_eof=True): if previous_sam is None: previous_sam = sam continue types = mapping.get_pair_type(previous_sam, sam, 190, 1000, min_clip=2) self.assertEqual(types, expected[i]) i += 1 previous_sam = None
def _extend_contigs_with_bam(self, bam_in, out_prefix=None, output_all_useful_reads=False): if out_prefix is not None: fa_out1 = pyfastaq.utils.open_file_write(out_prefix + '_1.fa') fa_out2 = pyfastaq.utils.open_file_write(out_prefix + '_2.fa') keep_read_types = set([mapping.CAN_EXTEND_LEFT, mapping.CAN_EXTEND_RIGHT, mapping.KEEP]) if output_all_useful_reads: keep_read_types.add(mapping.BOTH_UNMAPPED) previous_sam = None left_seqs = [] right_seqs = [] sam_reader = pysam.Samfile(bam_in, "rb") for current_sam in sam_reader.fetch(until_eof=True): if previous_sam is None: previous_sam = current_sam continue previous_type, current_type = mapping.get_pair_type(previous_sam, current_sam, self._get_ref_length_sam_pair(sam_reader, previous_sam, current_sam), self.max_insert, min_clip=self.min_clip) for sam, sam_type in [(previous_sam, previous_type), (current_sam, current_type)]: if sam_type == mapping.CAN_EXTEND_LEFT: name = mapping.get_ref_name(sam, sam_reader) clipped = mapping.soft_clipped(sam)[0] self.contigs[name].add_left_kmer(common.decode(sam.seq[:clipped])) elif sam_type == mapping.CAN_EXTEND_RIGHT: name = mapping.get_ref_name(sam, sam_reader) self.contigs[name].add_right_kmer(common.decode(sam.seq[sam.qend:])) if out_prefix is not None and sam_type in keep_read_types: if sam.is_read1: print(mapping.sam_to_fasta(sam), file=fa_out1) else: print(mapping.sam_to_fasta(sam), file=fa_out2) previous_sam = None if out_prefix is not None: pyfastaq.utils.close(fa_out1) pyfastaq.utils.close(fa_out2) total_bases_added = 0 for ctg in self.contigs: left_length, right_length = self.contigs[ctg].extend(self.ext_min_cov, self.ext_min_ratio, self.ext_bases) if self.verbose: print(' extend contig ' + ctg, 'new_length:' + str(len(self.contigs[ctg])), 'added_left:' + str(left_length), 'added_right:' + str(right_length), sep='\t') self.contig_lengths[ctg].append([len(self.contigs[ctg]), left_length, right_length]) total_bases_added += left_length + right_length return total_bases_added
def test_get_pair_type(self): '''Test get_pair_type''' expected = [ (mapping.CAN_EXTEND_LEFT, mapping.KEEP), (mapping.KEEP, mapping.CAN_EXTEND_RIGHT), (mapping.KEEP, mapping.KEEP), (mapping.NOT_USEFUL, mapping.NOT_USEFUL), (mapping.CAN_EXTEND_LEFT, mapping.KEEP), (mapping.BOTH_UNMAPPED, mapping.BOTH_UNMAPPED), (mapping.NOT_USEFUL, mapping.NOT_USEFUL) ] sam_reader = pysam.Samfile(os.path.join(data_dir, 'mapping_test.smalt.out.bam'), "rb") previous_sam = None i = 0 for sam in sam_reader.fetch(until_eof=True): if previous_sam is None: previous_sam = sam continue types = mapping.get_pair_type(previous_sam, sam, 190, 1000, min_clip=2) self.assertEqual(types, expected[i]) i += 1 previous_sam = None