Exemple #1
0
 def testReadName(self):
     EQ(
         "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957/9681_9727",
         self.fwdAln.readName)
     EQ(
         "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957/9561_9619",
         self.revAln.readName)
Exemple #2
0
    def testBaxAttaching(self):
        # Before attaching, should get sane exceptions
        with assert_raises(ValueError):
            self.fwdAln.zmw

        with assert_raises(ValueError):
            self.fwdAln.zmwRead

        # Now attach
        self.f.attach(self.BAX_FILE)
        EQ(
            'm140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957/9681_9727',
            self.fwdAln.readName)
        EQ(
            'm140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957',
            self.fwdAln.zmwName)
        EQ(
            '<Zmw: m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957>',
            repr(self.fwdAln.zmw))
        EQ(
            '<ZmwRead: m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957/9681_9727>',
            repr(self.fwdAln.zmwRead))

        # Check read contents, for every aln.
        for aln in self.alns:
            EQ(aln.read(aligned=False, orientation="native"),
               aln.zmwRead.basecalls())
    def testClippedAlignments(self):
        # Get a more interesting (more gappy) fwd strand aln
        a = self.fwdAln
        EQ([(980, 'C', 'C'),
            (981, 'C', 'C'),
            (982, 'T', 'T'),
            (983, 'A', '-'),
            (984, 'C', 'C'),
            (985, '-', 'G'),
            (985, 'T', 'T'),
            (986, 'T', 'T') ],
           zip(a.referencePositions(), a.reference(), a.read())[308:316])

        ac1 = a.clippedTo(983, 985)
        EQ(983, ac1.referenceStart)
        EQ(985, ac1.referenceEnd)
        EQ([(983, 'A', '-'),
            (984, 'C', 'C')],
           zip(ac1.referencePositions(), ac1.reference(), ac1.read()))

        ac2 = a.clippedTo(982, 986)
        EQ(982, ac2.referenceStart)
        EQ(986, ac2.referenceEnd)
        EQ([(982, 'T', 'T'),
            (983, 'A', '-'),
            (984, 'C', 'C'),
            (985, '-', 'G'),
            (985, 'T', 'T')],
           zip(ac2.referencePositions(), ac2.reference(), ac2.read()))

        ac3 = a.clippedTo(984, 985)
        EQ(984, ac3.referenceStart)
        EQ(985, ac3.referenceEnd)
        EQ([(984, 'C', 'C')],
           zip(ac3.referencePositions(), ac3.reference(), ac3.read()))
Exemple #4
0
    def test_readPositions(self):
        # Native orientation on a fwd strand read
        EQ([('A', 44), ('A', 45), ('C', 46), ('T', 47), ('G', 48), ('G', 49),
            ('T', 50), ('-', 51), ('-', 51), ('C', 51)],
           zip(self._inCmpH5[26].read()[:10],
               self._inCmpH5[26].readPositions()[:10]))

        # Genomic orientation on a fwd strand read
        EQ([('A', 44), ('A', 45), ('C', 46), ('T', 47), ('G', 48), ('G', 49),
            ('T', 50), ('-', 51), ('-', 51), ('C', 51)],
           zip(self._inCmpH5[26].read(orientation="genomic")[:10],
               self._inCmpH5[26].readPositions(orientation="genomic")[:10]))

        # Test native orientation on a rev. strand read
        EQ([('T', 295), ('C', 296), ('C', 297), ('G', 298), ('-', 299),
            ('C', 299), ('G', 300), ('C', 301), ('C', 302), ('C', 303)],
           zip(self.hit0.read()[-10:],
               self.hit0.readPositions()[-10:]))

        # Test genomic orientation on a rev. strand read
        EQ([('G', 303), ('G', 302), ('G', 301), ('C', 300), ('G', 299),
            ('-', 298), ('C', 298), ('G', 297), ('G', 296), ('A', 295)],
           zip(
               self.hit0.read(orientation="genomic")[:10],
               self.hit0.readPositions(orientation="genomic")[:10]))
 def testErrorCounts(self):
     for aln in [self.fwdAln, self.revAln]:
         counts = Counter(aln.transcript())
         EQ(counts["M"], aln.nM)
         EQ(counts["R"], aln.nMM)
         EQ(counts["I"], aln.nIns)
         EQ(counts["D"], aln.nDel)
 def testUnalignedReference(self):
     expectedFwdNative = "GCCGCGCTGGATGAACTGATACCGGGGTTGCTGAGTGAATATATCGAACAGTCAGGTTAACAGGCTGCGGCATTTTGTCCGCGCCGGGCTTCGCTCACTGTTCAGGCCGGAGCCACAGACCGCCGTTGAATGGGCGGATGCTAATTACTATCTCCCGAAAGAATCCGCATACCAGGAAGGGCGCTGGGAAACACTGCCCTTTCAGCGGGCCATCATGAATGCGATGGGCAGCGACTACATCCGTGAGGTGAATGTGGTGAAGTCTGCCCGTGTCGGTTATTCCAAAATGCTGCTGGGTGTTTATGCCTACTTTATAGAGCATAAGCAGCGCAACACCCTTATCTGGTTGCC"
     EQ(expectedFwdNative, self.fwdAln.reference(aligned=False))
     EQ(expectedFwdNative, self.fwdAln.reference(aligned=False, orientation="genomic"))
     expectedRevNative = "TAGCCACCGGATATCCCACAGGTGAGCCGTGTAGTTGAAGGTTTTTACGTCAGATTCTTTTGGGATTGGCTTGGGTTTATTTCTGGTGCGTTTCGTTGGAAGGTATTTGCAGTTTTCGCAGATTATGTCGGTGATACTTCGTCGCTGTCTCGCCACACGTCCTCCTTTTCCTGCGGTAGTGGTAACACCCC"
     EQ(expectedRevNative, self.revAln.reference(aligned=False))
     EQ(RC(expectedRevNative), self.revAln.reference(aligned=False, orientation="genomic"))
Exemple #7
0
 def testReadName(self):
     EQ(
         "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/32328/1_344",
         self.fwdAln.readName)
     EQ(
         "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/51534/1_200",
         self.revAln.readName)
Exemple #8
0
 def test_reads_in_range_bounds(self):
     EQ(len(self._inCmpH5.readsInRange(1, 0, 1)), 2)
     EQ(all([x.tStart == 0 for x in self._inCmpH5.readsInRange(1, 0, 1)]),
        True)
     EQ(len(self._inCmpH5.readsInRange(1, 1000, 1051)), 0)
     EQ(len(self._inCmpH5.readsInRange(1, 1000, 1052)), 1)
     EQ(len(self._inCmpH5.readsInRange(1, 0, 1e20)), len(self._inCmpH5))
Exemple #9
0
    def test_referencePositions(self):
        # Native orientation on a fwd strand read
        EQ([('G', 7466), ('A', 7467), ('A', 7468), ('G', 7469), ('-', 7470),
            ('C', 7470), ('T', 7471), ('-', 7472), ('G', 7472), ('C', 7473)],
           zip(self._inCmpH5[26].reference()[25:35],
               self._inCmpH5[26].referencePositions()[25:35]))

        # Genomic orientation on a fwd strand read
        EQ([('G', 7466), ('A', 7467), ('A', 7468), ('G', 7469), ('-', 7470),
            ('C', 7470), ('T', 7471), ('-', 7472), ('G', 7472), ('C', 7473)],
           zip(
               self._inCmpH5[26].reference(orientation="genomic")[25:35],
               self._inCmpH5[26].referencePositions(
                   orientation="genomic")[25:35]))

        # Test native orientation on a rev. strand read
        EQ([('T', 8), ('C', 7), ('-', 6), ('G', 6), ('C', 5), ('C', 4),
            ('G', 3), ('C', 2), ('C', 1), ('C', 0)],
           zip(self._inCmpH5[0].reference()[-10:],
               self._inCmpH5[0].referencePositions()[-10:]))

        # Test genomic orientation on a rev. strand read
        EQ([('G', 0), ('G', 1), ('G', 2), ('C', 3), ('G', 4), ('G', 5),
            ('C', 6), ('-', 7), ('G', 7), ('A', 8)],
           zip(self._inCmpH5[0].reference(orientation="genomic")[:10],
               self._inCmpH5[0].referencePositions(
                   orientation="genomic")[:10]))
Exemple #10
0
 def testReadGroupTable(self):
     rgFwd = self.fwdAln.readGroupInfo
     EQ([('ID', '<i4'), ('MovieName', 'O'), ('ReadType', 'O'),
         ('SequencingChemistry', 'O'), ('FrameRate', '<f8')], rgFwd.dtype)
     EQ("P6-C4", rgFwd.SequencingChemistry)
     EQ("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0",
        rgFwd.MovieName)
Exemple #11
0
    def testTranscript(self):
        EQ(
            'MMMMMMRMDMMMMIIMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMIMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMDMMMMMMMMMMMMMMRMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMDMIMMMMMMMMMMMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMM',
            self.fwdAln.transcript())

        EQ(
            "MMMMMMMMMMMMMMMIMMMMMMMMMMIMMMMMMMIMMMMMDMMMIMMMMIMMMMMMMMMMMMMMMMMMMMMMIMMMMMMMMMMMMMMMIMMMMMMMMMMMMMMMDMMMMMMMMMMMMMMMMMMMMMMMMMMMMMIIMIMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM",
            self.revAln.transcript())
Exemple #12
0
 def testUnalignedRead(self):
     expectedFwdNative = "TACGGTCATCATCTGACACTACAGACTCTGGCATCGCTGTGAAGAC"
     EQ(expectedFwdNative, self.fwdAln.read(aligned=False))
     EQ(expectedFwdNative,
        self.fwdAln.read(aligned=False, orientation="genomic"))
     expectedRevNative = "CTTGTGAAAATGCTGAATTCTGCGTCGCTTCACCAGCGATGCCAAGTCTGTAGTGTCA"
     EQ(expectedRevNative, self.revAln.read(aligned=False))
     EQ(RC(expectedRevNative),
        self.revAln.read(aligned=False, orientation="genomic"))
Exemple #13
0
 def test_load_updated_mapping(self):
     import os
     from os.path import dirname
     from pbcore.chemistry.chemistry import _loadBarcodeMappings
     os.environ["SMRT_CHEMISTRY_BUNDLE_DIR"] = dirname(data.getMappingXml())
     mappings = _loadBarcodeMappings()
     EQ(mappings.get(("1", "2", "3.4"), None), "FOUND")
     del os.environ["SMRT_CHEMISTRY_BUNDLE_DIR"]
     mappings = _loadBarcodeMappings()
     EQ(mappings.get(("1", "2", "3.4"), None), None)
Exemple #14
0
 def testHoleNumbers(self):
     c = Counter([a.holeNumber for a in self.f])  # from records
     c2 = Counter(self.f.holeNumber)  # from index
     expected = Counter({
         37134: 14,
         6251: 10,
         32861: 8,
         14743: 4,
         35858: 3,
         39571: 3,
         13473: 3,
         32560: 3,
         46835: 3,
         47698: 3,
         16996: 3,
         30983: 2,
         38025: 2,
         36363: 2,
         7957: 2,
         49050: 2,
         23454: 2,
         49194: 2,
         24494: 2,
         20211: 2,
         50621: 2,
         12736: 2,
         19915: 2,
         6469: 2,
         31174: 2,
         32328: 2,
         42827: 2,
         7247: 2,
         50257: 2,
         2771: 2,
         1650: 2,
         45203: 2,
         24962: 1,
         32901: 1,
         36628: 1,
         26262: 1,
         15641: 1,
         19360: 1,
         42165: 1,
         44356: 1,
         51534: 1,
         29843: 1,
         38754: 1,
         52206: 1,
         49521: 1,
         7670: 1,
         54396: 1,
         19837: 1
     })
     EQ(expected, c)
     EQ(expected, c2)
    def testReadsByName(self):
        reads2771_1 = self.f.readsByName("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/2771/*")
        reads2771_2 = self.f.readsByName("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/2771")
        reads2771_3 = self.f.readsByName("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/2771/")

        expectedReadNames = ["m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/2771/8741_8874",
                             "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/2771/8942_9480"]

        EQ(expectedReadNames, [r.readName for r in reads2771_1])
        EQ(expectedReadNames, [r.readName for r in reads2771_2])
        EQ(expectedReadNames, [r.readName for r in reads2771_3])
Exemple #16
0
    def test_cigar(self):
        EQ(
            "6M2D12M1I10M1D21M1I2M2I7M1I2M1I10M1I4M1I11M1D1I4M1I1M1I36M1I4M2I1M"
            +
            "1D2M1I9M1I15M1I9M1I4M9D9M1I2M1D16M1I20M1D4M1D8M3I12M1I2M2I7M1I4M1I"
            + "1M1D4M1I6M1I1M1D5M", self._inCmpH5[0].cigar())

        EQ(
            "5M1D1M1I6M1I4M1D1M1I4M1I7M2I2M1I12M3I8M1D4M1D20M1I16M1D2M1I9M9D4M1"
            +
            "I9M1I15M1I9M1I2M1D1M2I4M1I36M1I1M1I4M1I1D11M1I4M1I10M1I2M1I7M2I2M1"
            + "I21M1D10M1I12M2D6M",
            self._inCmpH5[0].cigar(orientation="genomic"))
def test_diploid_variantsFromAlignment():
    refWin = (0, 10, 17)

    EQ([], variantsFromAlignment(refWin, "GATTACA", "GATTACA"))

    EQ([Variant(0, 13, 14, "T", "G")],
       variantsFromAlignment(refWin, "GATTACA", "GATGACA"))

    EQ([Variant(0, 12, 14, "TT", "GG")],
       variantsFromAlignment(refWin, "GATTACA", "GAGGACA"))

    EQ([Variant(0, 12, 13, "T", "G"),
        Variant(0, 14, 15, "A", "G")],
       variantsFromAlignment(refWin, "GATTACA", "GAGNGCA"))

    EQ([Variant(0, 15, 16, "C", "")],
       variantsFromAlignment(refWin, "GATTACA", "GATTAA"))

    EQ([Variant(0, 12, 12, "", "T")],
       variantsFromAlignment(refWin, "GATTACA", "GATTTACA"))

    EQ([Variant(0, 13, 14, "T", "A", "T")],
       variantsFromAlignment(refWin, "GATTACA", "GATWACA"))

    EQ([Variant(0, 12, 13, "T", "A", "T"),
        Variant(0, 13, 14, "T", "A", "T")],
       variantsFromAlignment(refWin, "GATTACA", "GAWWACA"))
 def test_retrieve_read_group_properties(self):
     f1 = tempfile.NamedTemporaryFile(suffix=".sam").name
     f2 = tempfile.NamedTemporaryFile(suffix=".bam").name
     with open(f1, "w") as f:
         f.write(self.SAM_IN)
     with pysam.AlignmentFile(f1) as sam_in:
         with pysam.AlignmentFile(f2, 'wb', template=sam_in) as bam_out:
             for aln in sam_in:
                 bam_out.write(aln)
     movie_names = []
     with BamReader(f2) as bam_in:
         for aln in bam_in:
             EQ(aln.sequencingChemistry, "P6-C4")
             movie_names.append(aln.movieName)
     EQ(movie_names, ['movie1', 'm140906_231018_42161_c100676332550000001823129611271486_s1_p0'])
Exemple #19
0
 def test_empty_bam_reads_in_range(self):
     with IndexedBamReader(data.getEmptyAlignedBam()) as bam:
         reads = bam.readsInRange("lambda_NEB3011",
                                  0,
                                  50000,
                                  justIndices=True)
         EQ(len(reads), 0)
Exemple #20
0
 def testNoCallBasesInReference1(self):
     a = PairwiseAlignment("GATTNGATT", "GAGGATATT")
     vs = utils.variantsFromAlignment(a, (1, 1000, 2000))
     EQ([
         Variant(1, 1002, 1004, "TT", "GG", refPrev="A", readPrev="A"),
         Variant(1, 1005, 1006, "G", "T", refPrev="N", readPrev="A")
     ], vs)
Exemple #21
0
 def testTwoSubstitutions(self):
     a = PairwiseAlignment("GATTACA", "GAGTAGA")
     vs = utils.variantsFromAlignment(a, (1, 1000, 2000))
     EQ([
         Variant(1, 1002, 1003, "T", "G", refPrev="A", readPrev="A"),
         Variant(1, 1005, 1006, "C", "G", refPrev="A", readPrev="A")
     ], vs)
def test_algorithm_selection():
    EQ("quiver", bestAlgorithm_(["P6-C4"]))
    EQ("quiver", bestAlgorithm_(["P6-C4", "P5-C3"]))
    EQ("arrow",  bestAlgorithm_(["S/P1-C1/beta"]))
    EQ("arrow",  bestAlgorithm_(["P6-C4", "S/P1-C1/beta"]))
    EQ(None,     bestAlgorithm_(["P6-C4", "unknown"]))
    EQ("arrow",  bestAlgorithm_(["S/P1-C1"]))
    EQ("arrow",  bestAlgorithm_(["P6-C4", "S/P1-C1.1"]))
    EQ("arrow",  bestAlgorithm_(["P5-C3", "S/P1-C1.1"])) # (Arrow pres. no training for P5.  But it will tell us that)
 def testClippingsVsBaxData(self):
     self.f.attach(self.BAX_FILE)
     for aln in [self.fwdAln, self.revAln]:
         for cS in xrange(aln.tStart, aln.tEnd + 1):
             for cE in xrange(cS + 1, min(aln.tEnd, cS + 10)):
                 ca = aln.clippedTo(cS, cE)
                 EQ(ca.zmwRead.basecalls(),
                    ca.read(aligned=False, orientation="native"))
 def testReadsInRange(self):
     wLen = 1000
     for wStart in xrange(0, 50000, wLen):
         wEnd = wStart + wLen
         expectedNames = set([ a.readName for a in self.alns
                               if (a.referenceName == "lambda_NEB3011" and
                                   a.overlapsReferenceRange(wStart, wEnd)) ])
         EQ(expectedNames,
            set([ a.readName for a in self.f.readsInRange("lambda_NEB3011", wStart, wEnd) ]))
Exemple #25
0
 def test_alignment_identity(self):
     """
     Check that the values of the 'identity' property are consistent
     between IndexedBamReader (numpy array) and BamAlignment (float)
     """
     fn = data.getBamAndCmpH5()[0]
     with IndexedBamReader(fn) as bam_in:
         i1 = bam_in.identity
         i2 = np.array([rec.identity for rec in bam_in])
         EQ((i2 == i1).all(), True)
Exemple #26
0
 def testIpd(self):
     """Check that 'Ipd' feature is recognized correctly."""
     pfa = self.bam.pulseFeaturesAvailable()
     EQ(
         pfa,
         frozenset([
             'Ipd', 'DeletionTag', 'MergeQV', 'SubstitutionQV',
             'InsertionQV', 'DeletionQV'
         ]))
     ipd = self.bamRead0.IPD(aligned=False, orientation="native")
Exemple #27
0
 def test_alignment_identity_unindexed(self):
     """
     Check that the value of the 'identity' property is the same whether
     or not the .pbi index was used to calculate it.
     """
     fn1 = data.getBamAndCmpH5()[0]
     fn2 = tempfile.NamedTemporaryFile(suffix=".bam").name
     shutil.copyfile(fn1, fn2)
     with IndexedBamReader(fn1) as bam_pbi:
         with BamReader(fn2) as bam_noindex:
             i1 = np.array([rec.identity for rec in bam_pbi])
             i2 = np.array([rec.identity for rec in bam_noindex])
             EQ((i2 == i1).all(), True)
Exemple #28
0
 def testVariantsFromAlignment4(self):
     a = PairwiseAlignment("GA-TACA", "GATTACA")
     qvs = [0, 0, 1, 0, 0, 0, 0]
     vs = utils.variantsFromAlignment(a, (1, 1000, 2000), qvs)
     EQ([
         Variant(1,
                 1002,
                 1002,
                 "",
                 "T",
                 confidence=1,
                 refPrev="A",
                 readPrev="A")
     ], vs)
Exemple #29
0
 def testVariantsFromAlignment5(self):
     a = PairwiseAlignment("-ATTACA", "GATTACA")
     qvs = [1, 0, 0, 0, 0, 0, 0]
     vs = utils.variantsFromAlignment(a, (1, 1000, 2000), qvs)
     EQ([
         Variant(1,
                 1000,
                 1000,
                 "",
                 "G",
                 confidence=1,
                 refPrev="N",
                 readPrev="N")
     ], vs)
Exemple #30
0
 def testVariantsFromAlignment6(self):
     a = PairwiseAlignment("GATTAC-", "GATTACA")
     qvs = [0, 0, 0, 0, 0, 0, 1]
     vs = utils.variantsFromAlignment(a, (1, 1000, 2000), qvs)
     EQ([
         Variant(1,
                 1006,
                 1006,
                 "",
                 "A",
                 confidence=1,
                 refPrev="C",
                 readPrev="C")
     ], vs)