def test_fromString(self): singleCigarString = "100=" multiCigarString = "100=2D34I6=6X6=" singleCigar = PacBioBam.Cigar(singleCigarString) multiCigar = PacBioBam.Cigar(multiCigarString) self.assertEqual(1, len(singleCigar)) c = singleCigar[0] self.assertEqual('=', c.Char()) self.assertEqual(100, c.Length()) self.assertEqual(6, len(multiCigar)) op0 = multiCigar[0] op1 = multiCigar[1] op2 = multiCigar[2] op3 = multiCigar[3] op4 = multiCigar[4] op5 = multiCigar[5] self.assertEqual('=', op0.Char()) self.assertEqual('D', op1.Char()) self.assertEqual('I', op2.Char()) self.assertEqual('=', op3.Char()) self.assertEqual('X', op4.Char()) self.assertEqual('=', op5.Char()) self.assertEqual(100, op0.Length()) self.assertEqual(2, op1.Length()) self.assertEqual(34, op2.Length()) self.assertEqual(6, op3.Length()) self.assertEqual(6, op4.Length()) self.assertEqual(6, op5.Length())
def test_copy(self): interval1 = PacBioBam.PositionInterval(5, 8) interval2 = PacBioBam.PositionInterval(interval1) interval3 = interval1 self.assertEqual(interval1, interval1) self.assertEqual(interval1, interval2) self.assertEqual(interval1, interval3)
def test_copy(self): a = PacBioBam.GenomicInterval("foo", 10, 20) b = PacBioBam.GenomicInterval(a) c = a self.assertEqual(a, a) self.assertEqual(a, b) self.assertEqual(a, c)
def test_ctors(self): f = PacBioBam.Frames() self.assertEqual(0, len(f.Data())) f2 = PacBioBam.Frames(self.testframes) d = f2.Data() self.assertEqual(len(self.testframes), len(d)) for i, v in enumerate(d): self.assertEqual(int(self.testframes[i]), int(v))
def test_modifiers(self): interval1 = PacBioBam.PositionInterval(5, 8) interval2 = PacBioBam.PositionInterval(interval1) interval2.Start(2) interval2.Stop(10) self.assertNotEqual(interval1, interval2) self.assertEqual(2, interval2.Start()) self.assertEqual(10, interval2.Stop())
def test_cigarOpCtors(self): c1 = PacBioBam.CigarOperation('S', 10) c2 = PacBioBam.CigarOperation(PacBioBam.SOFT_CLIP, 10) self.assertEqual('S', c1.Char()) self.assertEqual('S', c2.Char()) self.assertEqual(PacBioBam.SOFT_CLIP, c1.Type()) self.assertEqual(PacBioBam.SOFT_CLIP, c2.Type()) self.assertEqual(10, c1.Length()) self.assertEqual(10, c2.Length())
def test_fromNumbers(self): fastqString = "~~~KKBB!!" values = [93, 93, 93, 42, 42, 33, 33, 0, 0] qvs = PacBioBam.QualityValues() for value in values: qvs.append(PacBioBam.QualityValue(value)) self.assertEqual(fastqString, qvs.Fastq())
def test_setType(self): m = PacBioBam.CigarOperation() i = PacBioBam.CigarOperation() d = PacBioBam.CigarOperation() n = PacBioBam.CigarOperation() s = PacBioBam.CigarOperation() h = PacBioBam.CigarOperation() p = PacBioBam.CigarOperation() e = PacBioBam.CigarOperation() x = PacBioBam.CigarOperation() m.Type(PacBioBam.ALIGNMENT_MATCH) i.Type(PacBioBam.INSERTION) d.Type(PacBioBam.DELETION) n.Type(PacBioBam.REFERENCE_SKIP) s.Type(PacBioBam.SOFT_CLIP) h.Type(PacBioBam.HARD_CLIP) p.Type(PacBioBam.PADDING) e.Type(PacBioBam.SEQUENCE_MATCH) x.Type(PacBioBam.SEQUENCE_MISMATCH) self.assertEqual('M', m.Char()) self.assertEqual('I', i.Char()) self.assertEqual('D', d.Char()) self.assertEqual('N', n.Char()) self.assertEqual('S', s.Char()) self.assertEqual('H', h.Char()) self.assertEqual('P', p.Char()) self.assertEqual('=', e.Char()) self.assertEqual('X', x.Char())
def test_setChar(self): m = PacBioBam.CigarOperation() i = PacBioBam.CigarOperation() d = PacBioBam.CigarOperation() n = PacBioBam.CigarOperation() s = PacBioBam.CigarOperation() h = PacBioBam.CigarOperation() p = PacBioBam.CigarOperation() e = PacBioBam.CigarOperation() x = PacBioBam.CigarOperation() m.Char('M') i.Char('I') d.Char('D') n.Char('N') s.Char('S') h.Char('H') p.Char('P') e.Char('=') x.Char('X') self.assertEqual(PacBioBam.ALIGNMENT_MATCH, m.Type()) self.assertEqual(PacBioBam.INSERTION, i.Type()) self.assertEqual(PacBioBam.DELETION, d.Type()) self.assertEqual(PacBioBam.REFERENCE_SKIP, n.Type()) self.assertEqual(PacBioBam.SOFT_CLIP, s.Type()) self.assertEqual(PacBioBam.HARD_CLIP, h.Type()) self.assertEqual(PacBioBam.PADDING, p.Type()) self.assertEqual(PacBioBam.SEQUENCE_MATCH, e.Type()) self.assertEqual(PacBioBam.SEQUENCE_MISMATCH, x.Type())
def test_ctors(self): empty = PacBioBam.PositionInterval() single = PacBioBam.PositionInterval(4) normal = PacBioBam.PositionInterval(5, 8) self.assertEqual(0, empty.Start()) self.assertEqual(0, empty.Stop()) self.assertEqual(4, single.Start()) self.assertEqual(5, single.Stop()) self.assertEqual(5, normal.Start()) self.assertEqual(8, normal.Stop())
def test_ctors(self): empty = PacBioBam.GenomicInterval() normal = PacBioBam.GenomicInterval("foo", 100, 200) self.assertEqual("", empty.Name()) self.assertEqual(0, empty.Start()) self.assertEqual(0, empty.Stop()) self.assertEqual("foo", normal.Name()) self.assertEqual(100, normal.Start()) self.assertEqual(200, normal.Stop())
def generatedNames(self): try: # open dataset on generated BAM file, read in names dataset = PacBioBam.DataSet(self.generatedBamFn) entireFile = PacBioBam.EntireFileQuery(dataset) names_out = [] for record in PacBioBam.Iterate(entireFile): names_out.append(record.FullName()) return names_out except RuntimeError: self.assertTrue(False) # should not throw
def test_length(self): a = PacBioBam.PositionInterval(2, 4) b = PacBioBam.PositionInterval(3, 5) c = PacBioBam.PositionInterval(6, 8) d = PacBioBam.PositionInterval(1, 7) e = PacBioBam.PositionInterval(5, 8) self.assertEqual(2, a.Length()) self.assertEqual(2, b.Length()) self.assertEqual(2, c.Length()) self.assertEqual(6, d.Length()) self.assertEqual(3, e.Length())
def test_modifiers(self): a = PacBioBam.GenomicInterval("foo", 10, 20) b = PacBioBam.GenomicInterval(a) b.Name("bar").Start(2).Stop(10) c = PacBioBam.GenomicInterval(a) c.Interval(b.Interval()) self.assertNotEqual(a, b) self.assertEqual("bar", b.Name()) self.assertEqual(2, b.Start()) self.assertEqual(10, b.Stop()) self.assertEqual(a.Name(), c.Name()) self.assertEqual(b.Interval(), c.Interval())
def originalNames(self): # loop over original file, store names, write to generated file try: file = PacBioBam.BamFile(self.ex2BamFn) writer = PacBioBam.BamWriter(self.generatedBamFn, file.Header()) dataset = PacBioBam.DataSet(self.ex2BamFn) entireFile = PacBioBam.EntireFileQuery(dataset) names_in = [] for record in PacBioBam.Iterate(entireFile): names_in.append(record.FullName()) writer.Write(record) return names_in except RuntimeError: self.assertTrue(False) # should not throw
def test_equality(self): empty = PacBioBam.PositionInterval() empty2 = PacBioBam.PositionInterval() singleton = PacBioBam.PositionInterval(4) sameAsSingleton = PacBioBam.PositionInterval(4, 5) normal = PacBioBam.PositionInterval(5, 8) sameAsNormal = PacBioBam.PositionInterval(5, 8) different = PacBioBam.PositionInterval(20, 40) # self-equality self.assertEqual(empty, empty) self.assertEqual(singleton, singleton) self.assertEqual(normal, normal) self.assertEqual(different, different) # same values self.assertEqual(empty, empty2) self.assertEqual(singleton, sameAsSingleton) self.assertEqual(normal, sameAsNormal) # different values self.assertNotEqual(empty, singleton) self.assertNotEqual(empty, normal) self.assertNotEqual(empty, different) self.assertNotEqual(singleton, normal) self.assertNotEqual(normal, different)
def test_internalHqToOriginal(self): # stitch virtual polymerase record hqRegionsBam = self.data.directory + "/polymerase/internal.hqregions.bam" lqRegionsBam = self.data.directory + "/polymerase/internal.lqregions.bam" vpr = PacBioBam.VirtualPolymeraseReader(hqRegionsBam, lqRegionsBam) self.assertTrue(vpr.HasNext()) virtualRecord = vpr.Next() # fetch original polymerase record polyBam = PacBioBam.DataSet(self.data.directory + "/polymerase/internal.polymerase.bam") polyQuery = PacBioBam.EntireFileQuery(polyBam) polyIter = polyQuery.begin() polyEnd = polyQuery.end() self.assertTrue(polyIter != polyEnd) polyRecord = polyIter.value() # # compare self.compare(polyRecord, virtualRecord)
def test_cover(self): a = PacBioBam.PositionInterval(2, 4) b = PacBioBam.PositionInterval(3, 5) c = PacBioBam.PositionInterval(6, 8) d = PacBioBam.PositionInterval(1, 7) e = PacBioBam.PositionInterval(5, 8) # 0123456789 # a -- # b -- # c -- # d ------ # e --- # self-cover self.assertTrue(a.Covers(a)) self.assertTrue(a.CoveredBy(a)) # basic covers/covered self.assertTrue(b.CoveredBy(d)) self.assertTrue(d.Covers(b)) self.assertNotEqual(b, d) self.assertFalse(b.Covers(d)) # completely disjoint self.assertFalse(b.Covers(c)) self.assertFalse(c.Covers(b)) self.assertFalse(b.CoveredBy(c)) self.assertFalse(c.CoveredBy(b)) # b.stop == e.start self.assertFalse(b.Covers(e)) self.assertFalse(b.CoveredBy(e)) # shared endpoint, start contained self.assertTrue(e.Covers(c)) self.assertTrue(c.CoveredBy(e))
def test_defaultCtor(self): header = PacBioBam.BamHeader() self.assertFalse(header.Version()) self.assertFalse(header.SortOrder()) self.assertEqual(0, len(header.ReadGroups())) self.assertEqual(0, len(header.Sequences())) self.assertEqual(0, len(header.Programs())) self.assertEqual(0, len(header.Comments())) with self.assertRaises(RuntimeError): pg = header.Program("foo") rg = header.ReadGroup("foo") sq = header.SequenceId("foo") sl = header.SequenceLength(42) sn = header.SequenceName(42)
def test_toString(self): singleCigarString = "100=" multiCigarString = "100=2D34I6=6X6=" singleCigar = PacBioBam.Cigar() singleCigar.append( PacBioBam.CigarOperation(PacBioBam.SEQUENCE_MATCH, 100)) multiCigar = PacBioBam.Cigar() multiCigar.append( PacBioBam.CigarOperation(PacBioBam.SEQUENCE_MATCH, 100)) multiCigar.append(PacBioBam.CigarOperation(PacBioBam.DELETION, 2)) multiCigar.append(PacBioBam.CigarOperation(PacBioBam.INSERTION, 34)) multiCigar.append(PacBioBam.CigarOperation(PacBioBam.SEQUENCE_MATCH, 6)) multiCigar.append( PacBioBam.CigarOperation(PacBioBam.SEQUENCE_MISMATCH, 6)) multiCigar.append(PacBioBam.CigarOperation(PacBioBam.SEQUENCE_MATCH, 6)) self.assertEqual(singleCigarString, singleCigar.ToStdString()) self.assertEqual(multiCigarString, multiCigar.ToStdString())
def test_clamp(self): a_zero = PacBioBam.Accuracy(0.0) a_neg = PacBioBam.Accuracy(-0.5) a_min = PacBioBam.Accuracy(0.0) a_normal = PacBioBam.Accuracy(0.9) a_max = PacBioBam.Accuracy(1.0) a_tooLarge = PacBioBam.Accuracy(1.1) self.assertAlmostEqual(float(0.0), float(a_zero)) self.assertAlmostEqual(float(0.0), float(a_neg)) self.assertAlmostEqual(float(0.0), float(a_min)) self.assertAlmostEqual(float(0.9), float(a_normal)) self.assertAlmostEqual(float(1.0), float(a_max)) self.assertAlmostEqual(float(1.0), float(a_tooLarge))
def test_decode(self): text = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n" "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n" "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n" "@RG\tID:rg1\tSM:control\n" "@RG\tID:rg2\tSM:condition1\n" "@RG\tID:rg3\tSM:condition1\n" "@PG\tID:_foo_\tPN:ide\n" "@CO\tipsum and so on\n" "@CO\tcitation needed\n") header = PacBioBam.BamHeader(text) self.assertEqual("1.1", header.Version()) self.assertEqual("queryname", header.SortOrder()) self.assertEqual("3.0.1", header.PacBioBamVersion()) self.assertEqual(3, len(header.ReadGroups())) self.assertTrue(header.HasReadGroup("rg1")) self.assertTrue(header.HasReadGroup("rg2")) self.assertTrue(header.HasReadGroup("rg3")) self.assertEqual("control", header.ReadGroup("rg1").Sample()) self.assertEqual("condition1", header.ReadGroup("rg2").Sample()) self.assertEqual("condition1", header.ReadGroup("rg3").Sample()) self.assertEqual(2, len(header.Sequences())) self.assertTrue(header.HasSequence("chr1")) self.assertTrue(header.HasSequence("chr2")) self.assertEqual("chocobo", header.Sequence("chr1").Species()) self.assertEqual("chocobo", header.Sequence("chr2").Species()) self.assertEqual("2038", header.Sequence("chr1").Length()) self.assertEqual("3042", header.Sequence("chr2").Length()) self.assertEqual(1, len(header.Programs())) self.assertTrue(header.HasProgram("_foo_")) self.assertEqual("ide", header.Program("_foo_").Name()) self.assertEqual(2, len(header.Comments())) self.assertEqual("ipsum and so on", header.Comments()[0]) self.assertEqual("citation needed", header.Comments()[1])
def test_encode(self): expectedText = ("@HD\tVN:1.1\tSO:queryname\tpb:3.0.1\n" "@SQ\tSN:chr1\tLN:2038\tSP:chocobo\n" "@SQ\tSN:chr2\tLN:3042\tSP:chocobo\n" "@RG\tID:rg1\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:control\tPM:SEQUEL\n" "@RG\tID:rg2\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n" "@RG\tID:rg3\tPL:PACBIO\tDS:READTYPE=UNKNOWN\tSM:condition1\tPM:SEQUEL\n" "@PG\tID:_foo_\tPN:ide\n" "@CO\tipsum and so on\n" "@CO\tcitation needed\n") rg1 = PacBioBam.ReadGroupInfo("rg1") rg1.Sample("control") rg2 = PacBioBam.ReadGroupInfo("rg2") rg2.Sample("condition1") rg3 = PacBioBam.ReadGroupInfo("rg3") rg3.Sample("condition1") seq1 = PacBioBam.SequenceInfo("chr1") seq1.Length("2038") seq1.Species("chocobo") seq2 = PacBioBam.SequenceInfo("chr2") seq2.Length("3042") seq2.Species("chocobo") prog1 = PacBioBam.ProgramInfo("_foo_") prog1.Name("ide") header = PacBioBam.BamHeader() header.Version("1.1") header.SortOrder("queryname") header.PacBioBamVersion("3.0.1") header.AddReadGroup(rg1) header.AddReadGroup(rg2) header.AddReadGroup(rg3) header.AddSequence(seq1) header.AddSequence(seq2) header.AddProgram(prog1) header.AddComment("ipsum and so on") header.AddComment("citation needed") self.assertEqual(expectedText, header.ToSam())
def test_validity(self): a = PacBioBam.PositionInterval() # default ctor b = PacBioBam.PositionInterval(0, 0) # start == stop (zero) c = PacBioBam.PositionInterval(4, 4) # start == stop (nonzero) d = PacBioBam.PositionInterval(0, 1) # start < stop (start is zero) e = PacBioBam.PositionInterval(4, 5) # start < stop (start is nonzero) f = PacBioBam.PositionInterval(5, 4) # start > stop self.assertFalse(a.IsValid()) self.assertFalse(b.IsValid()) self.assertFalse(c.IsValid()) self.assertTrue(d.IsValid()) self.assertTrue(e.IsValid()) self.assertFalse(f.IsValid())
def test_cover(self): a = PacBioBam.GenomicInterval("foo", 2, 4) b = PacBioBam.GenomicInterval("foo", 3, 5) c = PacBioBam.GenomicInterval("foo", 6, 8) d = PacBioBam.GenomicInterval("foo", 1, 7) e = PacBioBam.GenomicInterval("foo", 5, 8) f = PacBioBam.GenomicInterval("bar", 3, 5) # same as b, different ref # 0123456789 # a -- # b -- # c -- # d ------ # e --- # self-cover self.assertTrue(a.Covers(a)) self.assertTrue(a.CoveredBy(a)) # basic covers/covered self.assertTrue(b.CoveredBy(d)) self.assertTrue(d.Covers(b)) self.assertNotEqual(b, d) self.assertFalse(b.Covers(d)) # same coords as b, but different ref self.assertFalse(f.CoveredBy(d)) self.assertFalse(d.Covers(f)) self.assertNotEqual(f, d) self.assertFalse(f.Covers(d)) # obvious disjoint self.assertFalse(b.Covers(c)) self.assertFalse(c.Covers(b)) self.assertFalse(b.CoveredBy(c)) self.assertFalse(c.CoveredBy(b)) # b.stop == e.start (intervals are right-open, so disjoint) self.assertFalse(b.Covers(e)) self.assertFalse(b.CoveredBy(e)) # shared endpoint, start contained self.assertTrue(e.Covers(c)) self.assertTrue(c.CoveredBy(e))
def test_fromNumber(self): zero = PacBioBam.QualityValue(0) thirtythree = PacBioBam.QualityValue(33) normal = PacBioBam.QualityValue(42) maxQV = PacBioBam.QualityValue(93) tooHigh = PacBioBam.QualityValue(94) max8bit = PacBioBam.QualityValue(126) self.assertEqual(0, int(zero)) self.assertEqual(33, int(thirtythree)) self.assertEqual(42, int(normal)) self.assertEqual(93, int(maxQV)) self.assertEqual(93, int(tooHigh)) self.assertEqual(93, int(max8bit)) self.assertEqual('!', zero.Fastq()) self.assertEqual('B', thirtythree.Fastq()) self.assertEqual('K', normal.Fastq()) self.assertEqual('~', maxQV.Fastq()) self.assertEqual('~', tooHigh.Fastq()) self.assertEqual('~', max8bit.Fastq())
# with the distribution. # # * Neither the name of Pacific Biosciences nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE # GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC # BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # Author: Derek Barnett try: import PacBioBam as bam header = bam.BamHeader() print "\nPython wrapper OK.\n" except ImportError: print "\nPython wrapper failed!\n"
def test_productionHqToOriginal(self): # stitch virtual polymerase record hqRegionsBam = self.data.directory + "/polymerase/production_hq.hqregion.bam" lqRegionsBam = self.data.directory + "/polymerase/production_hq.scraps.bam" vpr = PacBioBam.VirtualPolymeraseReader(hqRegionsBam, lqRegionsBam) self.assertTrue(vpr.HasNext()) virtualRecord = vpr.Next() # fetch original polymerase record polyBam = PacBioBam.DataSet(self.data.directory + "/polymerase/production.polymerase.bam") polyQuery = PacBioBam.EntireFileQuery(polyBam) polyIter = polyQuery.begin() polyEnd = polyQuery.end() self.assertTrue(polyIter != polyEnd) polyRecord = polyIter.value() # compare self.assertFalse(polyRecord.HasPulseCall()); self.assertFalse(virtualRecord.HasPulseCall()); self.assertEqual(polyRecord.FullName(), virtualRecord.FullName()); self.assertEqual(polyRecord.HoleNumber(), virtualRecord.HoleNumber()); self.assertEqual(polyRecord.NumPasses(), virtualRecord.NumPasses()); self.assertEqual(polyRecord.Sequence(), virtualRecord.Sequence()); self.assertEqual(polyRecord.DeletionTag(), virtualRecord.DeletionTag()); self.assertEqual(polyRecord.SubstitutionTag(), virtualRecord.SubstitutionTag()); self.assertEqual(polyRecord.IPD(), virtualRecord.IPDV1Frames()); self.assertEqual(polyRecord.ReadGroup(), virtualRecord.ReadGroup()); self.assertAlmostEqual(float(polyRecord.ReadAccuracy()), float(virtualRecord.ReadAccuracy())); self.assertEqual(polyRecord.Qualities().Fastq(), virtualRecord.Qualities().Fastq()); self.assertEqual(polyRecord.DeletionQV().Fastq(), virtualRecord.DeletionQV().Fastq()); self.assertEqual(polyRecord.InsertionQV().Fastq(), virtualRecord.InsertionQV().Fastq()); self.assertEqual(polyRecord.MergeQV().Fastq(), virtualRecord.MergeQV().Fastq()); self.assertEqual(polyRecord.SubstitutionQV().Fastq(), virtualRecord.SubstitutionQV().Fastq()); self.assertTrue(polyRecord.HasDeletionQV()); self.assertTrue(polyRecord.HasDeletionTag()); self.assertTrue(polyRecord.HasInsertionQV()); self.assertTrue(polyRecord.HasMergeQV()); self.assertTrue(polyRecord.HasSubstitutionQV()); self.assertTrue(polyRecord.HasSubstitutionTag()); self.assertTrue(polyRecord.HasIPD()); self.assertFalse(polyRecord.HasLabelQV()); self.assertFalse(polyRecord.HasAltLabelQV()); self.assertFalse(polyRecord.HasAltLabelTag()); self.assertFalse(polyRecord.HasPkmean()); self.assertFalse(polyRecord.HasPkmid()); self.assertFalse(polyRecord.HasPulseCall()); self.assertFalse(polyRecord.HasPulseWidth()); self.assertFalse(polyRecord.HasPrePulseFrames()); self.assertFalse(polyRecord.HasPulseCallWidth()); self.assertTrue(virtualRecord.HasDeletionQV()); self.assertTrue(virtualRecord.HasDeletionTag()); self.assertTrue(virtualRecord.HasInsertionQV()); self.assertTrue(virtualRecord.HasMergeQV()); self.assertTrue(virtualRecord.HasSubstitutionQV()); self.assertTrue(virtualRecord.HasSubstitutionTag()); self.assertTrue(virtualRecord.HasIPD()); self.assertFalse(virtualRecord.HasLabelQV()); self.assertFalse(virtualRecord.HasAltLabelQV()); self.assertFalse(virtualRecord.HasAltLabelTag()); self.assertFalse(virtualRecord.HasPkmean()); self.assertFalse(virtualRecord.HasPkmid()); self.assertFalse(virtualRecord.HasPulseCall()); self.assertFalse(virtualRecord.HasPulseWidth()); self.assertFalse(virtualRecord.HasPrePulseFrames()); self.assertFalse(virtualRecord.HasPulseCallWidth());
def test_virtualRegions(self): subreadBam = self.data.directory + "/polymerase/internal.subreads.bam" scrapsBam = self.data.directory + "/polymerase/internal.scraps.bam" vpr = PacBioBam.VirtualPolymeraseReader(subreadBam, scrapsBam) virtualRecord = vpr.Next() # NOTE: this method is disabled # # Any attempt to retrive this value resulted in several # "swig/python detected a memory leak of type 'unknown', no destructor found." # errors (& an empty dictionary result). The same info is available via the # VirtualRegionsTable(regionType) method, though a bit clunkier if you just want # to iterate. But access to region info for specific types are available & correct, # so I'm just going to leave this one out for now. - DB # # regionMap = virtualRecord.VirtualRegionsMap(); # ADAPTER adapter = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_ADAPTER) self.assertEqual(7, len(adapter)) self.assertEqual(3047, adapter[0].beginPos); self.assertEqual(3095, adapter[0].endPos); self.assertEqual(3650, adapter[1].beginPos); self.assertEqual(3700, adapter[1].endPos); self.assertEqual(4289, adapter[2].beginPos); self.assertEqual(4335, adapter[2].endPos); self.assertEqual(4888, adapter[3].beginPos); self.assertEqual(4939, adapter[3].endPos); self.assertEqual(5498, adapter[4].beginPos); self.assertEqual(5546, adapter[4].endPos); self.assertEqual(6116, adapter[5].beginPos); self.assertEqual(6173, adapter[5].endPos); self.assertEqual(6740, adapter[6].beginPos); self.assertEqual(6790, adapter[6].endPos); # BARCODE barcode = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_BARCODE) self.assertEqual(14, len(barcode)) self.assertEqual(3025, barcode[0].beginPos); self.assertEqual(3047, barcode[0].endPos); self.assertEqual(3095, barcode[1].beginPos); self.assertEqual(3116, barcode[1].endPos); self.assertEqual(3628, barcode[2].beginPos); self.assertEqual(3650, barcode[2].endPos); self.assertEqual(3700, barcode[3].beginPos); self.assertEqual(3722, barcode[3].endPos); self.assertEqual(4267, barcode[4].beginPos); self.assertEqual(4289, barcode[4].endPos); self.assertEqual(4335, barcode[5].beginPos); self.assertEqual(4356, barcode[5].endPos); self.assertEqual(4864, barcode[6].beginPos); self.assertEqual(4888, barcode[6].endPos); self.assertEqual(4939, barcode[7].beginPos); self.assertEqual(4960, barcode[7].endPos); self.assertEqual(5477, barcode[8].beginPos); self.assertEqual(5498, barcode[8].endPos); self.assertEqual(5546, barcode[9].beginPos); self.assertEqual(5571, barcode[9].endPos); self.assertEqual(6087, barcode[10].beginPos); self.assertEqual(6116, barcode[10].endPos); self.assertEqual(6173, barcode[11].beginPos); self.assertEqual(6199, barcode[11].endPos); self.assertEqual(6719, barcode[12].beginPos); self.assertEqual(6740, barcode[12].endPos); self.assertEqual(6790, barcode[13].beginPos); self.assertEqual(6812, barcode[13].endPos); # HQREGION hqregion = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_HQREGION) self.assertEqual(1, len(hqregion)) self.assertEqual(2659, hqregion[0].beginPos); self.assertEqual(7034, hqregion[0].endPos); # LQREGION lqregion = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_LQREGION) self.assertEqual(2, len(lqregion)) self.assertEqual(0, lqregion[0].beginPos); self.assertEqual(2659, lqregion[0].endPos); self.assertEqual(7034, lqregion[1].beginPos); self.assertEqual(7035, lqregion[1].endPos); # SUBREAD subread = virtualRecord.VirtualRegionsTable(PacBioBam.VirtualRegionType_SUBREAD) self.assertEqual(8, len(subread))
def test_validity(self): a = PacBioBam.GenomicInterval() # default b = PacBioBam.GenomicInterval("foo", 0, 0) # valid id, start == stop (zero) c = PacBioBam.GenomicInterval("foo", 4, 4) # valid id, start == stop (non-zero) d = PacBioBam.GenomicInterval( "foo", 0, 1) # valid id, start < stop (start == zero) OK e = PacBioBam.GenomicInterval( "foo", 4, 5) # valid id, start < stop (start > zero) OK f = PacBioBam.GenomicInterval("foo", 5, 4) # valid id, start > stop g = PacBioBam.GenomicInterval("", 0, 0) # invalid id, start == stop (zero) h = PacBioBam.GenomicInterval( "", 4, 4) # invalid id, start == stop (non-zero) i = PacBioBam.GenomicInterval( "", 0, 1) # invalid id, start < stop (start == zero) j = PacBioBam.GenomicInterval( "", 4, 5) # invalid id, start < stop (start > zero) k = PacBioBam.GenomicInterval("", 5, 4) # invalid id, start > stop self.assertTrue(d.IsValid()) self.assertTrue(e.IsValid()) self.assertFalse(a.IsValid()) self.assertFalse(b.IsValid()) self.assertFalse(c.IsValid()) self.assertFalse(f.IsValid()) self.assertFalse(g.IsValid()) self.assertFalse(h.IsValid()) self.assertFalse(i.IsValid()) self.assertFalse(j.IsValid()) self.assertFalse(k.IsValid())