def test_split_zmws_targetsize(self):
        N_RECORDS = 117
        N_ZMWS = 48
        test_file = upstreamdata.getUnalignedBam()
        ds1 = openDataFile(test_file)
        assert len([r for r in ds1]) == N_RECORDS
        assert len(ds1) == N_RECORDS
        assert len(set(ds1.index.holeNumber)) == N_ZMWS

        # with no split
        dss = list(ds1.split(targetSize=1000, zmws=True))
        assert len(dss) == 1
        assert sum([len([r for r in ds_]) for ds_ in dss]) == N_RECORDS
        assert sum([len(ds_) for ds_ in dss]) == N_RECORDS
        exp = [48]
        obs = sorted([len(set(ds.index.holeNumber)) for ds in dss])
        assert exp == obs

        # with a split
        dss = list(ds1.split(targetSize=25, zmws=True))
        assert len(dss) == 2
        assert sum([len([r for r in ds_]) for ds_ in dss]) == N_RECORDS
        assert sum([len(ds_) for ds_ in dss]) == N_RECORDS
        exp = [24, 24]
        obs = sorted([len(set(ds.index.holeNumber)) for ds in dss])
        assert exp == obs

        # with a split
        dss = list(ds1.split(targetSize=5, zmws=True))
        assert len(dss) == 10
        assert sum([len([r for r in ds_]) for ds_ in dss]) == N_RECORDS
        assert sum([len(ds_) for ds_ in dss]) == N_RECORDS
        exp = [4, 4, 5, 5, 5, 5, 5, 5, 5, 5]
        obs = sorted([len(set(ds.index.holeNumber)) for ds in dss])
        assert exp == obs
Example #2
0
 def test_subreadset_from_bam(self):
     # DONE control experiment for bug 28698
     bam = upstreamData.getUnalignedBam()
     ds1 = SubreadSet(bam, strict=False)
     fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     log.debug(fn)
     ds1.write(fn)
 def test_subreadset_from_bam(self):
     # DONE control experiment for bug 28698
     bam = upstreamData.getUnalignedBam()
     ds1 = SubreadSet(bam, strict=False)
     fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     log.debug(fn)
     ds1.write(fn)
 def test_get_dataset_uuid(self):
     ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True)
     ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds.write(ds_file)
     uuid = getDataSetUuid(ds_file)
     self.assertEqual(uuid, ds.uuid)
     with open(ds_file, "w") as out:
         out.write("hello world!")
     uuid = getDataSetUuid(ds_file)
     self.assertEqual(uuid, None)
Example #5
0
 def test_get_dataset_uuid(self):
     ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True)
     ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds.write(ds_file)
     uuid = getDataSetUuid(ds_file)
     assert uuid == ds.uuid
     with open(ds_file, "w") as out:
         out.write("hello world!")
     uuid = getDataSetUuid(ds_file)
     assert uuid is None
    def test_split_zmws_targetsize(self):
        N_RECORDS = 117
        N_ZMWS = 48
        test_file = upstreamdata.getUnalignedBam()
        ds1 = openDataFile(test_file)
        self.assertEqual(len([r for r in ds1]), N_RECORDS)
        self.assertEqual(len(ds1), N_RECORDS)
        self.assertEqual(len(set(ds1.index.holeNumber)), N_ZMWS)

        # with no split
        dss = ds1.split(targetSize=1000, zmws=True)
        self.assertEqual(len(dss), 1)
        self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)
        exp = [48]
        obs = sorted([len(set(ds.index.holeNumber)) for ds in dss])
        self.assertListEqual(exp, obs)

        # with a split
        dss = ds1.split(targetSize=25, zmws=True)
        self.assertEqual(len(dss), 2)
        self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)
        exp = [24, 24]
        obs = sorted([len(set(ds.index.holeNumber)) for ds in dss])
        self.assertListEqual(exp, obs)

        # with a split
        dss = ds1.split(targetSize=5, zmws=True)
        self.assertEqual(len(dss), 10)
        self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)
        exp = [4, 4, 5, 5, 5, 5, 5, 5, 5, 5]
        obs = sorted([len(set(ds.index.holeNumber)) for ds in dss])
        self.assertListEqual(exp, obs)
Example #7
0
    def test_split_zmws(self):
        N_RECORDS = 117
        test_file = upstreamdata.getUnalignedBam()
        ds1 = openDataFile(test_file)
        self.assertEqual(len([r for r in ds1]), N_RECORDS)
        self.assertEqual(len(ds1), N_RECORDS)
        dss = ds1.split(chunks=1, zmws=True)
        self.assertEqual(len(dss), 1)
        self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS)

        # We have a lower limit on the number of zmws, now
        dss = ds1.split(chunks=12, zmws=True)
        self.assertEqual(len(dss), 2)
        self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS)
        self.assertEqual(
            dss[0].zmwRanges,
            [('m140905_042212_sidney_c100564852550000001823085912221377_s1_X0',
              1650, 32328)])
        self.assertEqual(
            dss[-1].zmwRanges,
            [('m140905_042212_sidney_c100564852550000001823085912221377_s1_X0',
              32560, 54396)])
        ranges = sorted([c.zmwRanges[0][1:] for c in dss])
        interspans = []
        last = None
        for rg in ranges:
            if not last is None:
                interspans.append((last, rg[0]))
                self.assertFalse(last == rg[0])
            last = rg[1]
        for rg in interspans:
            self.assertEqual(
                len(
                    np.nonzero(
                        np.logical_and(ds1.index.holeNumber < rg[1],
                                       ds1.index.holeNumber > rg[0]))[0]), 0)
    def test_split_zmws(self):
        N_RECORDS = 117
        test_file = upstreamdata.getUnalignedBam()
        ds1 = openDataFile(test_file)
        self.assertEqual(len([r for r in ds1]), N_RECORDS)
        self.assertEqual(len(ds1), N_RECORDS)
        dss = ds1.split(chunks=1, zmws=True)
        self.assertEqual(len(dss), 1)
        self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)

        # We have a lower limit on the number of zmws, now
        dss = ds1.split(chunks=12, zmws=True)
        self.assertEqual(len(dss), 2)
        self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(
            dss[0].zmwRanges,
            [('m140905_042212_sidney_c100564852550000001823085912221377_s1_X0',
              1650, 32328)])
        self.assertEqual(
            dss[-1].zmwRanges,
            [('m140905_042212_sidney_c100564852550000001823085912221377_s1_X0',
              32560, 54396)])
        ranges = sorted([c.zmwRanges[0][1:] for c in dss])
        interspans = []
        last = None
        for rg in ranges:
            if not last is None:
                interspans.append((last, rg[0]))
                self.assertFalse(last == rg[0])
            last = rg[1]
        for rg in interspans:
            self.assertEqual(len(np.nonzero(np.logical_and(
                ds1.index.holeNumber < rg[1],
                ds1.index.holeNumber > rg[0]))[0]), 0)
Example #9
0
    def test_split_zmws_targetsize(self):
        N_RECORDS = 117
        N_ZMWS = 48
        test_file = upstreamdata.getUnalignedBam()
        ds1 = openDataFile(test_file)
        self.assertEqual(len([r for r in ds1]), N_RECORDS)
        self.assertEqual(len(ds1), N_RECORDS)
        self.assertEqual(len(set(ds1.index.holeNumber)), N_ZMWS)

        # with no split
        dss = ds1.split(targetSize=1000, zmws=True)
        self.assertEqual(len(dss), 1)
        self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS)
        exp = [48]
        obs = sorted([len(set(ds.index.holeNumber)) for ds in dss])
        self.assertListEqual(exp, obs)

        # with a split
        dss = ds1.split(targetSize=25, zmws=True)
        self.assertEqual(len(dss), 2)
        self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS)
        exp = [24, 24]
        obs = sorted([len(set(ds.index.holeNumber)) for ds in dss])
        self.assertListEqual(exp, obs)

        # with a split
        dss = ds1.split(targetSize=5, zmws=True)
        self.assertEqual(len(dss), 10)
        self.assertEqual(sum([len([r for r in ds_]) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]), N_RECORDS)
        exp = [4, 4, 5, 5, 5, 5, 5, 5, 5, 5]
        obs = sorted([len(set(ds.index.holeNumber)) for ds in dss])
        self.assertListEqual(exp, obs)
Example #10
0
    def __init__(self):
        self.bam = BamReader(data.getUnalignedBam())
        self.bax = BaxH5Reader(data.getBaxForBam())

        self.baxRead0 = next(self.bax.subreads())
        self.bamRead0 = next(iter(self.bam))
Example #11
0
 def test_get_dataset_metatype(self):
     ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True)
     ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds.write(ds_file)
     meta_type = getDataSetMetaType(ds_file)
     assert meta_type == "PacBio.DataSet.SubreadSet"
Example #12
0
    def test_context_filters(self):
        ss = SubreadSet(upstreamdata.getUnalignedBam())
        self.assertEqual(set(ss.index.contextFlag), {0, 1, 2, 3})
        self.assertEqual([
            len(np.flatnonzero(ss.index.contextFlag == cx))
            for cx in sorted(set(ss.index.contextFlag))
        ], [15, 33, 32, 37])
        self.assertEqual(len(ss.index), 117)

        # no adapters/barcodes
        ss.filters.addRequirement(cx=[('=', 0)])
        self.assertEqual(len(ss.index), 15)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # no adapters/barcodes
        ss.filters.addRequirement(cx=[('=', 'NO_LOCAL_CONTEXT')])
        self.assertEqual(len(ss.index), 15)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # some adapters/barcodes
        ss.filters.addRequirement(cx=[('!=', 0)])
        self.assertEqual(len(ss.index), 102)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before
        ss.filters.addRequirement(cx=[('&', 1)])
        self.assertEqual(len(ss.index), 70)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before
        ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE')])
        self.assertEqual(len(ss.index), 70)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter after
        ss.filters.addRequirement(cx=[('&', 2)])
        self.assertEqual(len(ss.index), 69)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before or after
        ss.filters.addRequirement(cx=[('&', 3)])
        self.assertEqual(len(ss.index), 102)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before or after
        ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE | ADAPTER_AFTER')])
        self.assertEqual(len(ss.index), 102)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before or after but not both
        ss.filters.addRequirement(cx=[('!=', 0)])
        ss.filters.addRequirement(cx=[('~', 1), ('~', 2)])
        self.assertEqual(len(ss.index), 65)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before or after
        ss.filters.addRequirement(cx=[('&', 1), ('&', 2)])
        self.assertEqual(len(ss.index), 102)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before and after
        ss.filters.addRequirement(cx=[('&', 1)])
        ss.filters.addRequirement(cx=[('&', 2)])
        self.assertEqual(len(ss.index), 37)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before but not after
        ss.filters.addRequirement(cx=[('&', 1)])
        ss.filters.addRequirement(cx=[('~', 2)])
        self.assertEqual(len(ss.index), 33)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # no adapter before
        ss.filters.addRequirement(cx=[('~', 1)])
        self.assertEqual(len(ss.index), 47)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # no adapter before or after
        ss.filters.addRequirement(cx=[('~', 1)])
        ss.filters.addRequirement(cx=[('~', 2)])
        self.assertEqual(len(ss.index), 15)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # no adapter before or after
        ss.filters.addRequirement(cx=[('~', 3)])
        self.assertEqual(len(ss.index), 15)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)
    def test_context_filters(self):
        ss = SubreadSet(upstreamdata.getUnalignedBam())
        assert set(ss.index.contextFlag) == {0, 1, 2, 3}
        assert [
            len(np.flatnonzero(ss.index.contextFlag == cx))
            for cx in sorted(set(ss.index.contextFlag))
        ] == [15, 33, 32, 37]
        assert len(ss.index) == 117

        # no adapters/barcodes
        ss.filters.addRequirement(cx=[('=', 0)])
        assert len(ss.index) == 15
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # no adapters/barcodes
        ss.filters.addRequirement(cx=[('=', 'NO_LOCAL_CONTEXT')])
        assert len(ss.index) == 15
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # some adapters/barcodes
        ss.filters.addRequirement(cx=[('!=', 0)])
        assert len(ss.index) == 102
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # adapter before
        ss.filters.addRequirement(cx=[('&', 1)])
        assert len(ss.index) == 70
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # adapter before
        ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE')])
        assert len(ss.index) == 70
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # adapter after
        ss.filters.addRequirement(cx=[('&', 2)])
        assert len(ss.index) == 69
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # adapter before or after
        ss.filters.addRequirement(cx=[('&', 3)])
        assert len(ss.index) == 102
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # adapter before or after
        ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE | ADAPTER_AFTER')])
        assert len(ss.index) == 102
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # adapter before or after but not both
        ss.filters.addRequirement(cx=[('!=', 0)])
        ss.filters.addRequirement(cx=[('~', 1), ('~', 2)])
        assert len(ss.index) == 65
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # adapter before or after
        ss.filters.addRequirement(cx=[('&', 1), ('&', 2)])
        assert len(ss.index) == 102
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # adapter before and after
        ss.filters.addRequirement(cx=[('&', 1)])
        ss.filters.addRequirement(cx=[('&', 2)])
        assert len(ss.index) == 37
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # adapter before but not after
        ss.filters.addRequirement(cx=[('&', 1)])
        ss.filters.addRequirement(cx=[('~', 2)])
        assert len(ss.index) == 33
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # no adapter before
        ss.filters.addRequirement(cx=[('~', 1)])
        assert len(ss.index) == 47
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # no adapter before or after
        ss.filters.addRequirement(cx=[('~', 1)])
        ss.filters.addRequirement(cx=[('~', 2)])
        assert len(ss.index) == 15
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117

        # no adapter before or after
        ss.filters.addRequirement(cx=[('~', 3)])
        assert len(ss.index) == 15
        ss.filters.removeRequirement('cx')
        assert len(ss.index) == 117
 def setup_class(cls):
     cls.bam = BamReader(data.getUnalignedBam())
     cls.bamRead0 = next(iter(cls.bam))
 def test_get_dataset_metatype(self):
     ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True)
     ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds.write(ds_file)
     meta_type = getDataSetMetaType(ds_file)
     self.assertEqual(meta_type, "PacBio.DataSet.SubreadSet")
Example #16
0
 def __init__(self):
     self.V = ZmwReadStitcher(getUnalignedBam())
     self.B = BasH5Reader(getBaxForBam())
     self.VZ = self.V[1650]
     self.BZ = self.B[1650]
    def test_context_filters(self):
        ss = SubreadSet(upstreamdata.getUnalignedBam())
        self.assertEqual(set(ss.index.contextFlag), {0, 1, 2, 3})
        self.assertEqual(
            [len(np.flatnonzero(ss.index.contextFlag == cx))
             for cx in sorted(set(ss.index.contextFlag))],
            [15, 33, 32, 37])
        self.assertEqual(len(ss.index), 117)

        # no adapters/barcodes
        ss.filters.addRequirement(cx=[('=', 0)])
        self.assertEqual(len(ss.index), 15)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # no adapters/barcodes
        ss.filters.addRequirement(cx=[('=', 'NO_LOCAL_CONTEXT')])
        self.assertEqual(len(ss.index), 15)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # some adapters/barcodes
        ss.filters.addRequirement(cx=[('!=', 0)])
        self.assertEqual(len(ss.index), 102)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before
        ss.filters.addRequirement(cx=[('&', 1)])
        self.assertEqual(len(ss.index), 70)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before
        ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE')])
        self.assertEqual(len(ss.index), 70)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter after
        ss.filters.addRequirement(cx=[('&', 2)])
        self.assertEqual(len(ss.index), 69)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before or after
        ss.filters.addRequirement(cx=[('&', 3)])
        self.assertEqual(len(ss.index), 102)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before or after
        ss.filters.addRequirement(cx=[('&', 'ADAPTER_BEFORE | ADAPTER_AFTER')])
        self.assertEqual(len(ss.index), 102)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before or after but not both
        ss.filters.addRequirement(cx=[('!=', 0)])
        ss.filters.addRequirement(cx=[('~', 1),
                                      ('~', 2)])
        self.assertEqual(len(ss.index), 65)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before or after
        ss.filters.addRequirement(cx=[('&', 1),
                                      ('&', 2)])
        self.assertEqual(len(ss.index), 102)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before and after
        ss.filters.addRequirement(cx=[('&', 1)])
        ss.filters.addRequirement(cx=[('&', 2)])
        self.assertEqual(len(ss.index), 37)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # adapter before but not after
        ss.filters.addRequirement(cx=[('&', 1)])
        ss.filters.addRequirement(cx=[('~', 2)])
        self.assertEqual(len(ss.index), 33)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # no adapter before
        ss.filters.addRequirement(cx=[('~', 1)])
        self.assertEqual(len(ss.index), 47)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # no adapter before or after
        ss.filters.addRequirement(cx=[('~', 1)])
        ss.filters.addRequirement(cx=[('~', 2)])
        self.assertEqual(len(ss.index), 15)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)

        # no adapter before or after
        ss.filters.addRequirement(cx=[('~', 3)])
        self.assertEqual(len(ss.index), 15)
        ss.filters.removeRequirement('cx')
        self.assertEqual(len(ss.index), 117)
Example #18
0
 def __init__(self):
     self.V = ZmwReadStitcher(getUnalignedBam())
     self.B = BasH5Reader(getBaxForBam())
     self.VZ = self.V[1650]
     self.BZ = self.B[1650]
    def __init__(self):
        self.bam = BamReader  (data.getUnalignedBam())
        self.bax = BaxH5Reader(data.getBaxForBam())

        self.baxRead0 = next(self.bax.subreads())
        self.bamRead0 = next(iter(self.bam))
    def setup_class(self):
        self.bam = BamReader  (data.getUnalignedBam())
        self.bax = BaxH5Reader(data.getBaxForBam())

        self.baxRead0 = next(self.bax.subreads())
        self.bamRead0 = next(iter(self.bam))