Esempio n. 1
0
 def aligned_segment(self):
     s = pysam.AlignedSegment()
     s.query_name = str(self.filename)
     s.query_sequence = self.seq
     s.flag = self.flag
     s.reference_start = self.pos
     s.mapping_quality = self.mapq
     s.cigar = self.cigar
     s.next_reference_start = self.pnext
     s.template_length = self.tlen
     s.query_qualities = pysam.fromQualityString(self.qual)
     s.tags = (("ES", self.es), ("RP", self.path_to_read))
     return s
Esempio n. 2
0
 def aligned_segment(self):
     s = pysam.AlignedSegment()
     s.query_name = str(self.filename)
     s.query_sequence= self.seq
     s.flag = self.flag
     s.reference_start = self.pos
     s.mapping_quality = self.mapq
     s.cigar = self.cigar
     s.next_reference_start = self.pnext
     s.template_length=self.tlen
     s.query_qualities = pysam.fromQualityString(self.qual)
     s.tags = (("ES", self.es),
       ("RP", self.path_to_read))
     return s        
    def test_get_aligned_pairs_padding(self):
        a = pysam.AlignedSegment()
        a.query_name = "read_12345"
        a.query_sequence = "ACGT" * 10
        a.flag = 0
        a.reference_id = 0
        a.reference_start = 20
        a.mapping_quality = 20
        a.cigartuples = ((7, 20), (6, 1), (8, 19))
        a.query_qualities = pysam.fromQualityString("1234") * 10

        def inner():
            a.get_aligned_pairs()
        # padding is not bein handled right now
        self.assertRaises(NotImplementedError, inner)
 def test_get_aligned_pairs_match_mismatch(self):
     a = pysam.AlignedSegment()
     a.query_name = "read_12345"
     a.query_sequence = "ACGT" * 10
     a.flag = 0
     a.reference_id = 0
     a.reference_start = 20
     a.mapping_quality = 20
     a.cigartuples = ((7, 20), (8, 20))
     a.query_qualities = pysam.fromQualityString("1234") * 10
     self.assertEqual(a.get_aligned_pairs(),
                      [(qpos, refpos) for (qpos, refpos) in zip(
                          range(0, 0 + 40), range(20, 20 + 40))])
     self.assertEqual(a.get_aligned_pairs(True),
                      [(qpos, refpos) for (qpos, refpos) in zip(
                          range(0, 0 + 40), range(20, 20 + 40))])
Esempio n. 5
0
    def testUpdate2(self):
        '''issue 135: inplace update of sequence and quality score.

        This does not work as setting the sequence will erase
        the quality scores.
        '''
        a = self.buildRead()
        a.query_sequence = a.query_sequence[5:10]
        self.assertEqual(pysam.toQualityString(a.query_qualities), None)

        a = self.buildRead()
        s = pysam.toQualityString(a.query_qualities)
        a.query_sequence = a.query_sequence[5:10]
        a.query_qualities = pysam.fromQualityString(s[5:10])

        self.assertEqual(pysam.toQualityString(a.query_qualities), s[5:10])
 def test_get_aligned_pairs_hard_clipping(self):
     a = pysam.AlignedSegment()
     a.query_name = "read_12345"
     a.query_sequence = "ACGT" * 10
     a.flag = 0
     a.reference_id = 0
     a.reference_start = 20
     a.mapping_quality = 20
     a.cigartuples = ((5, 2), (0, 35), (5, 3))
     a.query_qualities = pysam.fromQualityString("1234") * 10
     self.assertEqual(a.get_aligned_pairs(),
                      # No seq, no seq pos
                      [(qpos, refpos) for (qpos, refpos) in zip(
                          range(0, 0 + 35), range(20, 20 + 35))])
     self.assertEqual(a.get_aligned_pairs(True),
                      [(qpos, refpos) for (qpos, refpos) in zip(
                          range(0, 0 + 35), range(20, 20 + 35))])
Esempio n. 7
0
    def testLargeRead(self):
        '''build an example read.'''

        a = pysam.AlignedSegment()
        a.query_name = "read_12345"
        a.query_sequence = "ACGT" * 200
        a.flag = 0
        a.reference_id = 0
        a.reference_start = 20
        a.mapping_quality = 20
        a.cigartuples = ((0, 4 * 200), )
        a.next_reference_id = 0
        a.next_reference_start = 200
        a.template_length = 167
        a.query_qualities = pysam.fromQualityString("1234") * 200

        return a
Esempio n. 8
0
    def buildRead(self):
        '''build an example read.'''

        a = pysam.AlignedSegment()
        a.query_name = "read_12345"
        a.query_sequence = "ACGT" * 10
        a.flag = 0
        a.reference_id = 0
        a.reference_start = 20
        a.mapping_quality = 20
        a.cigartuples = ((0, 10), (2, 1), (0, 9), (1, 1), (0, 20))
        a.next_reference_id = 0
        a.next_reference_start = 200
        a.template_length = 167
        a.query_qualities = pysam.fromQualityString("1234") * 10
        # todo: create tags
        return a
 def test_get_aligned_pairs_skip(self):
     a = pysam.AlignedSegment()
     a.query_name = "read_12345"
     a.query_sequence = "ACGT" * 10
     a.flag = 0
     a.reference_id = 0
     a.reference_start = 20
     a.mapping_quality = 20
     a.cigartuples = ((0, 2), (3, 100), (0, 38))
     a.query_qualities = pysam.fromQualityString("1234") * 10
     self.assertEqual(a.get_aligned_pairs(),
                      [(0, 20), (1, 21)] +
                      [(None, refpos) for refpos in range(22, 22 + 100)] +
                      [(qpos, refpos) for (qpos, refpos) in zip(
                          range(2, 2 + 38),
                          range(20 + 2 + 100, 20 + 2 + 100 + 38))])
     self.assertEqual(a.get_aligned_pairs(True),
                      [(0, 20), (1, 21)] +
                      # [(None, refpos) for refpos in range(21, 21+100)] +
                      [(qpos, refpos) for (qpos, refpos) in zip(
                          range(2, 2 + 38),
                          range(20 + 2 + 100, 20 + 2 + 100 + 38))])
Esempio n. 10
0
 def test_get_aligned_pairs_soft_clipping(self):
     a = pysam.AlignedSegment()
     a.query_name = "read_12345"
     a.query_sequence = "ACGT" * 10
     a.flag = 0
     a.reference_id = 0
     a.reference_start = 20
     a.mapping_quality = 20
     a.cigartuples = ((4, 2), (0, 35), (4, 3))
     a.query_qualities = pysam.fromQualityString("1234") * 10
     self.assertEqual(a.get_aligned_pairs(),
                      [(0, None), (1, None)] +
                      [(qpos, refpos) for (qpos, refpos) in zip(
                          range(2, 2 + 35), range(20, 20 + 35))] +
                      [(37, None), (38, None), (39, None)]
                      )
     self.assertEqual(a.get_aligned_pairs(True),
                      # [(0, None), (1, None)] +
                      [(qpos, refpos) for (qpos, refpos) in zip(
                          range(2, 2 + 35), range(20, 20 + 35))]
                      # [(37, None), (38, None), (39, None)]
                      )
Esempio n. 11
0
    def parse_line (self, line):

        self.count["total"] += 1
        # Try bloc to skip to the next line in case a non standard line is found
        try:
            # Split the line and verify the number of fields
            split_line = line.strip().split("\t")
            assert len (split_line) >=  11, "Invalid number of field in bam aligned sequence"

            # Init an AlignedSegment
            read = Read(
                qname = split_line[0],
                flag = int(split_line[1]),
                rname = self.header.rname_to_refid(split_line[2]),
                pos = int(split_line[3])-1,
                mapq = int(split_line[4]),
                cigar = self._cigarstring_to_tuple(split_line[5]),
                rnext = self.header.rname_to_refid(split_line[6]),
                pnext = int(split_line[7])-1,
                seq = split_line[9],
                qual = pysam.fromQualityString(split_line[10]),
                tags = self._parse_tags(split_line [11:]) if len (split_line) >= 12 else tuple())

            # skip the read if secondary and required
            if read.is_secondary:
                self.count["secondary"] += 1
                if self.skip_secondary:
                    return None

            # finally return the read
            self.count["primary"] += 1
            return read

        except Exception as E:
            print E
            print "Invalid sequence line in bam: {}".format(line)
            self.count["invalid"] += 1
            return None
Esempio n. 12
0
import pysam
from mergedups.pileup import PileupElement, Pileup

__author__ = 'dankle'

my_read = pysam.AlignedSegment()
my_read.query_sequence="AGCT"
my_read.query_qualities = pysam.fromQualityString("FGHI")
my_read.reference_id = 0
my_read.reference_start = 32
my_read.mpos = 250
my_read.is_reverse = True
my_read.tags = (("NM", 1), ("RG", "L1"))

# my_read2 is truncated by one base
my_read2 = pysam.AlignedSegment()
my_read2.query_sequence="GGC"
my_read2.query_qualities = pysam.fromQualityString("FGH")

my_read3 = pysam.AlignedSegment()
my_read3.query_sequence="AACT"
my_read3.query_qualities = pysam.fromQualityString("FGHI")

my_read4 = pysam.AlignedSegment()
my_read4.query_sequence="GGCT"
my_read4.query_qualities = pysam.fromQualityString("FGHI")


def test_pileup_element():
    pe = PileupElement('A', 40)
    assert pe.phredqual() == 'I'