def aligned_segment(self): s = pysam.AlignedSegment() s.query_name = str(self.filename) s.query_sequence = self.seq s.flag = self.flag s.reference_start = self.pos s.mapping_quality = self.mapq s.cigar = self.cigar s.next_reference_start = self.pnext s.template_length = self.tlen s.query_qualities = pysam.fromQualityString(self.qual) s.tags = (("ES", self.es), ("RP", self.path_to_read)) return s
def aligned_segment(self): s = pysam.AlignedSegment() s.query_name = str(self.filename) s.query_sequence= self.seq s.flag = self.flag s.reference_start = self.pos s.mapping_quality = self.mapq s.cigar = self.cigar s.next_reference_start = self.pnext s.template_length=self.tlen s.query_qualities = pysam.fromQualityString(self.qual) s.tags = (("ES", self.es), ("RP", self.path_to_read)) return s
def test_get_aligned_pairs_padding(self): a = pysam.AlignedSegment() a.query_name = "read_12345" a.query_sequence = "ACGT" * 10 a.flag = 0 a.reference_id = 0 a.reference_start = 20 a.mapping_quality = 20 a.cigartuples = ((7, 20), (6, 1), (8, 19)) a.query_qualities = pysam.fromQualityString("1234") * 10 def inner(): a.get_aligned_pairs() # padding is not bein handled right now self.assertRaises(NotImplementedError, inner)
def test_get_aligned_pairs_match_mismatch(self): a = pysam.AlignedSegment() a.query_name = "read_12345" a.query_sequence = "ACGT" * 10 a.flag = 0 a.reference_id = 0 a.reference_start = 20 a.mapping_quality = 20 a.cigartuples = ((7, 20), (8, 20)) a.query_qualities = pysam.fromQualityString("1234") * 10 self.assertEqual(a.get_aligned_pairs(), [(qpos, refpos) for (qpos, refpos) in zip( range(0, 0 + 40), range(20, 20 + 40))]) self.assertEqual(a.get_aligned_pairs(True), [(qpos, refpos) for (qpos, refpos) in zip( range(0, 0 + 40), range(20, 20 + 40))])
def testUpdate2(self): '''issue 135: inplace update of sequence and quality score. This does not work as setting the sequence will erase the quality scores. ''' a = self.buildRead() a.query_sequence = a.query_sequence[5:10] self.assertEqual(pysam.toQualityString(a.query_qualities), None) a = self.buildRead() s = pysam.toQualityString(a.query_qualities) a.query_sequence = a.query_sequence[5:10] a.query_qualities = pysam.fromQualityString(s[5:10]) self.assertEqual(pysam.toQualityString(a.query_qualities), s[5:10])
def test_get_aligned_pairs_hard_clipping(self): a = pysam.AlignedSegment() a.query_name = "read_12345" a.query_sequence = "ACGT" * 10 a.flag = 0 a.reference_id = 0 a.reference_start = 20 a.mapping_quality = 20 a.cigartuples = ((5, 2), (0, 35), (5, 3)) a.query_qualities = pysam.fromQualityString("1234") * 10 self.assertEqual(a.get_aligned_pairs(), # No seq, no seq pos [(qpos, refpos) for (qpos, refpos) in zip( range(0, 0 + 35), range(20, 20 + 35))]) self.assertEqual(a.get_aligned_pairs(True), [(qpos, refpos) for (qpos, refpos) in zip( range(0, 0 + 35), range(20, 20 + 35))])
def testLargeRead(self): '''build an example read.''' a = pysam.AlignedSegment() a.query_name = "read_12345" a.query_sequence = "ACGT" * 200 a.flag = 0 a.reference_id = 0 a.reference_start = 20 a.mapping_quality = 20 a.cigartuples = ((0, 4 * 200), ) a.next_reference_id = 0 a.next_reference_start = 200 a.template_length = 167 a.query_qualities = pysam.fromQualityString("1234") * 200 return a
def buildRead(self): '''build an example read.''' a = pysam.AlignedSegment() a.query_name = "read_12345" a.query_sequence = "ACGT" * 10 a.flag = 0 a.reference_id = 0 a.reference_start = 20 a.mapping_quality = 20 a.cigartuples = ((0, 10), (2, 1), (0, 9), (1, 1), (0, 20)) a.next_reference_id = 0 a.next_reference_start = 200 a.template_length = 167 a.query_qualities = pysam.fromQualityString("1234") * 10 # todo: create tags return a
def test_get_aligned_pairs_skip(self): a = pysam.AlignedSegment() a.query_name = "read_12345" a.query_sequence = "ACGT" * 10 a.flag = 0 a.reference_id = 0 a.reference_start = 20 a.mapping_quality = 20 a.cigartuples = ((0, 2), (3, 100), (0, 38)) a.query_qualities = pysam.fromQualityString("1234") * 10 self.assertEqual(a.get_aligned_pairs(), [(0, 20), (1, 21)] + [(None, refpos) for refpos in range(22, 22 + 100)] + [(qpos, refpos) for (qpos, refpos) in zip( range(2, 2 + 38), range(20 + 2 + 100, 20 + 2 + 100 + 38))]) self.assertEqual(a.get_aligned_pairs(True), [(0, 20), (1, 21)] + # [(None, refpos) for refpos in range(21, 21+100)] + [(qpos, refpos) for (qpos, refpos) in zip( range(2, 2 + 38), range(20 + 2 + 100, 20 + 2 + 100 + 38))])
def test_get_aligned_pairs_soft_clipping(self): a = pysam.AlignedSegment() a.query_name = "read_12345" a.query_sequence = "ACGT" * 10 a.flag = 0 a.reference_id = 0 a.reference_start = 20 a.mapping_quality = 20 a.cigartuples = ((4, 2), (0, 35), (4, 3)) a.query_qualities = pysam.fromQualityString("1234") * 10 self.assertEqual(a.get_aligned_pairs(), [(0, None), (1, None)] + [(qpos, refpos) for (qpos, refpos) in zip( range(2, 2 + 35), range(20, 20 + 35))] + [(37, None), (38, None), (39, None)] ) self.assertEqual(a.get_aligned_pairs(True), # [(0, None), (1, None)] + [(qpos, refpos) for (qpos, refpos) in zip( range(2, 2 + 35), range(20, 20 + 35))] # [(37, None), (38, None), (39, None)] )
def parse_line (self, line): self.count["total"] += 1 # Try bloc to skip to the next line in case a non standard line is found try: # Split the line and verify the number of fields split_line = line.strip().split("\t") assert len (split_line) >= 11, "Invalid number of field in bam aligned sequence" # Init an AlignedSegment read = Read( qname = split_line[0], flag = int(split_line[1]), rname = self.header.rname_to_refid(split_line[2]), pos = int(split_line[3])-1, mapq = int(split_line[4]), cigar = self._cigarstring_to_tuple(split_line[5]), rnext = self.header.rname_to_refid(split_line[6]), pnext = int(split_line[7])-1, seq = split_line[9], qual = pysam.fromQualityString(split_line[10]), tags = self._parse_tags(split_line [11:]) if len (split_line) >= 12 else tuple()) # skip the read if secondary and required if read.is_secondary: self.count["secondary"] += 1 if self.skip_secondary: return None # finally return the read self.count["primary"] += 1 return read except Exception as E: print E print "Invalid sequence line in bam: {}".format(line) self.count["invalid"] += 1 return None
import pysam from mergedups.pileup import PileupElement, Pileup __author__ = 'dankle' my_read = pysam.AlignedSegment() my_read.query_sequence="AGCT" my_read.query_qualities = pysam.fromQualityString("FGHI") my_read.reference_id = 0 my_read.reference_start = 32 my_read.mpos = 250 my_read.is_reverse = True my_read.tags = (("NM", 1), ("RG", "L1")) # my_read2 is truncated by one base my_read2 = pysam.AlignedSegment() my_read2.query_sequence="GGC" my_read2.query_qualities = pysam.fromQualityString("FGH") my_read3 = pysam.AlignedSegment() my_read3.query_sequence="AACT" my_read3.query_qualities = pysam.fromQualityString("FGHI") my_read4 = pysam.AlignedSegment() my_read4.query_sequence="GGCT" my_read4.query_qualities = pysam.fromQualityString("FGHI") def test_pileup_element(): pe = PileupElement('A', 40) assert pe.phredqual() == 'I'