Python DataSet.split Exemples, pbcore.io.DataSet.split Python Exemples

Exemple #1

0

Afficher le fichier

 def test_split(self):
     ds1 = DataSet(data.getXml())
     self.assertTrue(ds1.numExternalResources > 1)
     dss = ds1.split()
     self.assertTrue(len(dss) == ds1.numExternalResources)
     dss = ds1.split(chunks=1)
     self.assertTrue(len(dss) == 1)
     dss = ds1.split(chunks=2, ignoreSubDatasets=True)
     self.assertTrue(len(dss) == 2)
     self.assertFalse(dss[0].uuid == dss[1].uuid)
     self.assertTrue(dss[0].name == dss[1].name)
     # Lets try merging and splitting on subdatasets
     ds1 = DataSet(data.getXml(8))
     self.assertEquals(ds1.totalLength, 123588)
     ds1tl = ds1.totalLength
     ds2 = DataSet(data.getXml(11))
     self.assertEquals(ds2.totalLength, 117086)
     ds2tl = ds2.totalLength
     dss = ds1 + ds2
     self.assertTrue(dss.totalLength == (ds1tl + ds2tl))
     ds1, ds2 = sorted(dss.split(2),
                       key=lambda x: x.totalLength,
                       reverse=True)
     self.assertTrue(ds1.totalLength == ds1tl)
     self.assertTrue(ds2.totalLength == ds2tl)

Exemple #2

0

Afficher le fichier

Fichier : test_pbdataset.py Projet : jrharting/pbcore

    def test_split_by_contigs_with_split(self):
        # test to make sure the refWindows work when chunks == # refs
        ds3 = DataSet(data.getBam())
        dss = ds3.split(contigs=True)
        self.assertEqual(len(dss), 12)
        refWindows = sorted(reduce(lambda x, y: x + y,
                                   [ds.refWindows for ds in dss]))
        # not all references have something mapped to them, refWindows doesn't
        # care...
        self.assertNotEqual(refWindows, sorted(ds3.refWindows))
        random_few = [('C.beijerinckii.13', 0, 1433),
                      ('B.vulgatus.4', 0, 1449),
                      ('E.faecalis.1', 0, 1482)]
        for reference in random_few:
            found = False
            for ref in refWindows:
                if ref == reference:
                    found = True
            self.assertTrue(found)
        old_refWindows = refWindows

        dss = ds3.split(contigs=True, chunks=1)
        self.assertEqual(len(dss), 1)
        refWindows = sorted(reduce(lambda x, y: x + y,
                                   [ds.refWindows for ds in dss]))
        self.assertEqual(refWindows, old_refWindows)

        dss = ds3.split(contigs=True, chunks=24)
        self.assertEqual(len(dss), 24)
        refWindows = sorted(reduce(lambda x, y: x + y,
                                   [ds.refWindows for ds in dss]))

        random_few = [('E.faecalis.2', 0, 741),
                      ('E.faecalis.2', 741, 1482)]
        for ref in random_few:
            found = False
            for window in refWindows:
                if ref == window:
                    found = True
            if not found:
                log.debug(ref)
            self.assertTrue(found)

        dss = ds3.split(contigs=True, chunks=36)
        self.assertEqual(len(dss), 36)
        refWindows = sorted(reduce(lambda x, y: x + y,
                                   [ds.refWindows for ds in dss]))
        random_few = [('E.faecalis.2', 0, 494),
                      ('E.faecalis.2', 494, 988),
                      ('E.faecalis.2', 988, 1482)]
        for ref in random_few:
            found = False
            for window in refWindows:
                if ref == window:
                    found = True
            self.assertTrue(found)

Exemple #3

0

Afficher le fichier

    def test_split_by_contigs_with_split(self):
        # test to make sure the refWindows work when chunks == # refs
        ds3 = DataSet(data.getBam())
        dss = ds3.split(contigs=True)
        self.assertEqual(len(dss), 12)
        refWindows = sorted(
            reduce(lambda x, y: x + y, [ds.refWindows for ds in dss]))
        # not all references have something mapped to them, refWindows doesn't
        # care...
        self.assertNotEqual(refWindows, sorted(ds3.refWindows))
        random_few = [('C.beijerinckii.13', 0, 1433),
                      ('B.vulgatus.4', 0, 1449), ('E.faecalis.1', 0, 1482)]
        for reference in random_few:
            found = False
            for ref in refWindows:
                if ref == reference:
                    found = True
            self.assertTrue(found)
        old_refWindows = refWindows

        dss = ds3.split(contigs=True, chunks=1)
        self.assertEqual(len(dss), 1)
        refWindows = sorted(
            reduce(lambda x, y: x + y, [ds.refWindows for ds in dss]))
        self.assertEqual(refWindows, old_refWindows)

        dss = ds3.split(contigs=True, chunks=24)
        self.assertEqual(len(dss), 24)
        refWindows = sorted(
            reduce(lambda x, y: x + y, [ds.refWindows for ds in dss]))

        random_few = [('E.faecalis.2', 0, 741), ('E.faecalis.2', 741, 1482)]
        for ref in random_few:
            found = False
            for window in refWindows:
                if ref == window:
                    found = True
            if not found:
                log.debug(ref)
            self.assertTrue(found)

        dss = ds3.split(contigs=True, chunks=36)
        self.assertEqual(len(dss), 36)
        refWindows = sorted(
            reduce(lambda x, y: x + y, [ds.refWindows for ds in dss]))
        random_few = [('E.faecalis.2', 0, 494), ('E.faecalis.2', 494, 988),
                      ('E.faecalis.2', 988, 1482)]
        for ref in random_few:
            found = False
            for window in refWindows:
                if ref == window:
                    found = True
            self.assertTrue(found)

Exemple #4

0

Afficher le fichier

    def test_refLengths(self):
        ds = DataSet(data.getBam(0))
        random_few = {
            'B.cereus.6': 1472,
            'S.agalactiae.1': 1470,
            'B.cereus.4': 1472
        }
        for key, value in random_few.items():
            self.assertEqual(ds.refLengths[key], value)

        # this is a hack to only emit refNames that actually have records
        # associated with them:
        dss = ds.split(contigs=True, chunks=1)[0]
        self.assertEqual(
            dss.refLengths, {
                'B.vulgatus.4': 1449,
                'B.vulgatus.5': 1449,
                'C.beijerinckii.13': 1433,
                'C.beijerinckii.14': 1433,
                'C.beijerinckii.9': 1433,
                'E.coli.6': 1463,
                'E.faecalis.1': 1482,
                'E.faecalis.2': 1482,
                'R.sphaeroides.1': 1386,
                'S.epidermidis.2': 1472,
                'S.epidermidis.3': 1472,
                'S.epidermidis.4': 1472
            })

Exemple #5

0

Afficher le fichier

Fichier : test_pbdataset.py Projet : jrharting/pbcore

 def test_split(self):
     ds1 = DataSet(data.getXml())
     self.assertTrue(ds1.numExternalResources > 1)
     dss = ds1.split()
     self.assertTrue(len(dss) == ds1.numExternalResources)
     dss = ds1.split(chunks=1)
     self.assertTrue(len(dss) == 1)
     dss = ds1.split(chunks=2, ignoreSubDatasets=True)
     self.assertTrue(len(dss) == 2)
     self.assertFalse(dss[0].uuid == dss[1].uuid)
     self.assertTrue(dss[0].name == dss[1].name)
     # Lets try merging and splitting on subdatasets
     ds1 = DataSet(data.getXml(8))
     self.assertEquals(ds1.totalLength, 123588)
     ds1tl = ds1.totalLength
     ds2 = DataSet(data.getXml(11))
     self.assertEquals(ds2.totalLength, 117086)
     ds2tl = ds2.totalLength
     dss = ds1 + ds2
     self.assertTrue(dss.totalLength == (ds1tl + ds2tl))
     ds1, ds2 = sorted(dss.split(2), key=lambda x: x.totalLength,
                       reverse=True)
     self.assertTrue(ds1.totalLength == ds1tl)
     self.assertTrue(ds2.totalLength == ds2tl)

Exemple #6

0

Afficher le fichier

 def test_reads_in_subdataset(self):
     ds = DataSet(data.getXml(8))
     #refs = ['E.faecalis.1', 'E.faecalis.2']
     #readRefs = ['E.faecalis.1'] * 2 + ['E.faecalis.2'] * 9
     #ds.filters.removeRequirement('rname')
     dss = ds.split(contigs=True)
     self.assertEqual(len(dss), 12)
     self.assertEqual([
         'B.vulgatus.4', 'B.vulgatus.5', 'C.beijerinckii.13',
         'C.beijerinckii.14', 'C.beijerinckii.9', 'E.coli.6',
         'E.faecalis.1', 'E.faecalis.2', 'R.sphaeroides.1',
         'S.epidermidis.2', 'S.epidermidis.3', 'S.epidermidis.4'
     ], sorted([ds.filters[0][0].value for ds in dss]))
     self.assertEqual(len(list(dss[0].readsInSubDatasets())), 3)
     self.assertEqual(len(list(dss[1].readsInSubDatasets())), 20)

Exemple #7

0

Afficher le fichier

Fichier : test_pbdataset.py Projet : jrharting/pbcore

 def test_reads_in_subdataset(self):
     ds = DataSet(data.getXml(8))
     #refs = ['E.faecalis.1', 'E.faecalis.2']
     #readRefs = ['E.faecalis.1'] * 2 + ['E.faecalis.2'] * 9
     #ds.filters.removeRequirement('rname')
     dss = ds.split(contigs=True)
     self.assertEqual(len(dss), 12)
     self.assertEqual(['B.vulgatus.4', 'B.vulgatus.5',
                       'C.beijerinckii.13', 'C.beijerinckii.14',
                       'C.beijerinckii.9', 'E.coli.6', 'E.faecalis.1',
                       'E.faecalis.2', 'R.sphaeroides.1',
                       'S.epidermidis.2', 'S.epidermidis.3',
                       'S.epidermidis.4'],
                      sorted([ds.filters[0][0].value for ds in dss]))
     self.assertEqual(len(list(dss[0].readsInSubDatasets())), 3)
     self.assertEqual(len(list(dss[1].readsInSubDatasets())), 20)

Exemple #8

0

Afficher le fichier

def splitXml(args):
    log.debug("Starting split")
    dataSet = DataSet(args.infile, strict=args.strict)
    chunks = len(args.outfiles)
    if args.chunks:
        chunks = args.chunks
    dss = dataSet.split(chunks=chunks,
                        ignoreSubDatasets=(not args.subdatasets),
                        contigs=args.contigs,
                        maxChunks=args.maxChunks,
                        breakContigs=args.breakContigs)
    log.debug("Split into {i} chunks".format(i=len(dss)))
    infix = 'chunk{i}'
    if args.contigs:
        infix += 'contigs'
    if not args.outfiles:
        if not args.outdir:
            args.outfiles = [
                '.'.join(
                    args.infile.split('.')[:-1] +
                    [infix.format(i=chNum), 'xml'])
                for chNum in range(len(dss))
            ]
        else:
            args.outfiles = [
                '.'.join(
                    args.infile.split('.')[:-1] +
                    [infix.format(i=chNum), 'xml'])
                for chNum in range(len(dss))
            ]
            args.outfiles = [
                os.path.join(args.outdir, os.path.basename(outfn))
                for outfn in args.outfiles
            ]
            num = len(dss)
            end = ''
            if num > 5:
                num = 5
                end = '...'
            log.debug("Emitting {f} {e}".format(f=', '.join(
                args.outfiles[:num]),
                                                e=end))
    log.debug("Finished splitting, now writing")
    for out_fn, dset in zip(args.outfiles, dss):
        dset.write(out_fn)
    log.debug("Done writing files")

Exemple #9

0

Afficher le fichier

 def test_refWindows(self):
     ds = DataSet(data.getBam())
     dss = ds.split(chunks=2, contigs=True)
     self.assertEqual(len(dss), 2)
     log.debug(dss[0].filters)
     log.debug(dss[1].filters)
     self.assertTrue('( rname = E.faecalis.2 ) ' in str(dss[0].filters)
                     or '( rname = E.faecalis.2 ) ' in str(dss[1].filters))
     ds = DataSet(data.getBam())
     ds.filters.addRequirement(rname=[('=', 'lambda_NEB3011'),
                                      ('=', 'lambda_NEB3011')],
                               tStart=[('<', '0'), ('<', '100')],
                               tEnd=[('>', '99'), ('>', '299')])
     self.assertEqual(
         str(ds.filters), '( rname = lambda_NEB3011 AND tstart '
         '< 0 AND tend > 99 ) OR ( rname = lambd'
         'a_NEB3011 AND tstart < 100 AND tend > 299 )')

Exemple #10

0

Afficher le fichier

Fichier : EntryPoints.py Projet : jrharting/pbcore

def splitXml(args):
    log.debug("Starting split")
    dataSet = DataSet(args.infile, strict=args.strict)
    chunks = len(args.outfiles)
    if args.chunks:
        chunks = args.chunks
    dss = dataSet.split(chunks=chunks,
                        ignoreSubDatasets=(not args.subdatasets),
                        contigs=args.contigs,
                        maxChunks=args.maxChunks,
                        breakContigs=args.breakContigs)
    log.debug("Split into {i} chunks".format(i=len(dss)))
    infix = 'chunk{i}'
    if args.contigs:
        infix += 'contigs'
    if not args.outfiles:
        if not args.outdir:
            args.outfiles = ['.'.join(args.infile.split('.')[:-1] +
                                      [infix.format(i=chNum), 'xml'])
                             for chNum in range(len(dss))]
        else:
            args.outfiles = ['.'.join(args.infile.split('.')[:-1] +
                                      [infix.format(i=chNum), 'xml'])
                             for chNum in range(len(dss))]
            args.outfiles = [os.path.join(args.outdir,
                                          os.path.basename(outfn))
                             for outfn in args.outfiles]
            num = len(dss)
            end = ''
            if num > 5:
                num = 5
                end = '...'
            log.debug("Emitting {f} {e}".format(
                f=', '.join(args.outfiles[:num]),
                e=end))
    log.debug("Finished splitting, now writing")
    for out_fn, dset in zip(args.outfiles, dss):
        dset.write(out_fn)
    log.debug("Done writing files")

Exemple #11

0

Afficher le fichier

Fichier : test_pbdataset.py Projet : jrharting/pbcore

    def test_refLengths(self):
        ds = DataSet(data.getBam(0))
        random_few = {'B.cereus.6': 1472, 'S.agalactiae.1': 1470,
                      'B.cereus.4': 1472}
        for key, value in random_few.items():
            self.assertEqual(ds.refLengths[key], value)

        # this is a hack to only emit refNames that actually have records
        # associated with them:
        dss = ds.split(contigs=True, chunks=1)[0]
        self.assertEqual(dss.refLengths, {'B.vulgatus.4': 1449,
                                          'B.vulgatus.5': 1449,
                                          'C.beijerinckii.13': 1433,
                                          'C.beijerinckii.14': 1433,
                                          'C.beijerinckii.9': 1433,
                                          'E.coli.6': 1463,
                                          'E.faecalis.1': 1482,
                                          'E.faecalis.2': 1482,
                                          'R.sphaeroides.1': 1386,
                                          'S.epidermidis.2': 1472,
                                          'S.epidermidis.3': 1472,
                                          'S.epidermidis.4': 1472
                                         })

Exemple #12

0

Afficher le fichier

Fichier : test_pbdataset.py Projet : jrharting/pbcore

 def test_refWindows(self):
     ds = DataSet(data.getBam())
     dss = ds.split(chunks=2, contigs=True)
     self.assertEqual(len(dss), 2)
     log.debug(dss[0].filters)
     log.debug(dss[1].filters)
     self.assertTrue(
         '( rname = E.faecalis.2 ) '
         in str(dss[0].filters)
         or
         '( rname = E.faecalis.2 ) '
         in str(dss[1].filters))
     ds = DataSet(data.getBam())
     ds.filters.addRequirement(rname=[('=', 'lambda_NEB3011'),
                                      ('=', 'lambda_NEB3011')],
                               tStart=[('<', '0'),
                                       ('<', '100')],
                               tEnd=[('>', '99'),
                                     ('>', '299')])
     self.assertEqual(str(ds.filters),
                      '( rname = lambda_NEB3011 AND tstart '
                      '< 0 AND tend > 99 ) OR ( rname = lambd'
                      'a_NEB3011 AND tstart < 100 AND tend > 299 )')

Exemple #13

0

Afficher le fichier

Fichier : test_pbdataset.py Projet : jrharting/pbcore

    def test_reads_in_contig(self):
        log.info("Testing reads in contigs")
        ds = DataSet(data.getXml(8))
        dss = ds.split(contigs=True)
        self.assertEqual(len(dss), 12)
        efaec1TimesFound = 0
        efaec1TotFound = 0
        efaec2TimesFound = 0
        efaec2TotFound = 0
        for ds in dss:
            ef1 = len(list(ds.readsInReference('E.faecalis.1')))
            ef2 = len(list(ds.readsInReference('E.faecalis.2')))
            if ef1:
                efaec1TimesFound += 1
                efaec1TotFound += ef1
            if ef2:
                efaec2TimesFound += 1
                efaec2TotFound += ef2
        self.assertEqual(efaec1TimesFound, 1)
        self.assertEqual(efaec1TotFound, 20)
        self.assertEqual(efaec2TimesFound, 1)
        self.assertEqual(efaec2TotFound, 3)

        ds = DataSet(data.getXml(8))
        filt = Filters()
        filt.addRequirement(length=[('>', '100')])
        ds.addFilters(filt)
        dss = ds.split(contigs=True)
        self.assertEqual(len(dss), 12)
        efaec1TimesFound = 0
        efaec1TotFound = 0
        efaec2TimesFound = 0
        efaec2TotFound = 0
        for ds in dss:
            ef1 = len(list(ds.readsInReference('E.faecalis.1')))
            ef2 = len(list(ds.readsInReference('E.faecalis.2')))
            if ef1:
                efaec1TimesFound += 1
                efaec1TotFound += ef1
            if ef2:
                efaec2TimesFound += 1
                efaec2TotFound += ef2
        self.assertEqual(efaec1TimesFound, 1)
        self.assertEqual(efaec1TotFound, 20)
        self.assertEqual(efaec2TimesFound, 1)
        self.assertEqual(efaec2TotFound, 3)

        ds = DataSet(data.getXml(8))
        filt = Filters()
        filt.addRequirement(length=[('>', '1000')])
        ds.addFilters(filt)
        dss = ds.split(contigs=True)
        self.assertEqual(len(dss), 9)
        efaec1TimesFound = 0
        efaec1TotFound = 0
        efaec2TimesFound = 0
        efaec2TotFound = 0
        for ds in dss:
            ef1 = len(list(ds.readsInReference('E.faecalis.1')))
            ef2 = len(list(ds.readsInReference('E.faecalis.2')))
            if ef1:
                efaec1TimesFound += 1
                efaec1TotFound += ef1
            if ef2:
                efaec2TimesFound += 1
                efaec2TotFound += ef2
        self.assertEqual(efaec1TimesFound, 1)
        self.assertEqual(efaec1TotFound, 20)
        self.assertEqual(efaec2TimesFound, 1)
        self.assertEqual(efaec2TotFound, 1)

Exemple #14

0

Afficher le fichier

Fichier : test_pbdataset.py Projet : jrharting/pbcore

    def test_split_by_contigs_with_split_and_maxChunks(self):
        # test to make sure the refWindows work when chunks == # refs
        ds3 = DataSet(data.getBam())
        dss = ds3.split(contigs=True)
        self.assertEqual(len(dss), 12)
        refWindows = sorted(reduce(lambda x, y: x + y,
                                   [ds.refWindows for ds in dss]))
        # not all references have something mapped to them, refWindows doesn't
        # care...
        self.assertNotEqual(refWindows, sorted(ds3.refWindows))
        random_few = [('C.beijerinckii.13', 0, 1433),
                      ('B.vulgatus.4', 0, 1449),
                      ('E.faecalis.1', 0, 1482)]
        for reference in random_few:
            found = False
            for ref in refWindows:
                if ref == reference:
                    found = True
            self.assertTrue(found)
        old_refWindows = refWindows

        dss = ds3.split(contigs=True, maxChunks=1)
        self.assertEqual(len(dss), 1)
        refWindows = sorted(reduce(lambda x, y: x + y,
                                   [ds.refWindows for ds in dss]))
        self.assertEqual(refWindows, old_refWindows)

        dss = ds3.split(contigs=True, maxChunks=24)
        # This isn't expected if num refs >= 100, as map check isn't made
        # for now (too expensive)
        # There are only 12 refs represented in this set, however...
        self.assertEqual(len(dss), 12)
        refWindows = sorted(reduce(lambda x, y: x + y,
                                   [ds.refWindows for ds in dss]))

        for ref in random_few:
            found = False
            for window in refWindows:
                if ref == window:
                    found = True
            if not found:
                log.debug(ref)
            self.assertTrue(found)

        dss = ds3.split(contigs=True, maxChunks=36)
        self.assertEqual(len(dss), 12)
        refWindows = sorted(reduce(lambda x, y: x + y,
                                   [ds.refWindows for ds in dss]))
        for ref in random_few:
            found = False
            for window in refWindows:
                if ref == window:
                    found = True
            self.assertTrue(found)

        dss = ds3.split(contigs=True, maxChunks=36, breakContigs=True)
        self.assertEqual(len(dss), 2)
        refWindows = sorted(reduce(lambda x, y: x + y,
                                   [ds.refWindows for ds in dss]))
        for ref in random_few:
            found = False
            for window in refWindows:
                if ref == window:
                    found = True
            self.assertTrue(found)

Exemple #15

0

Afficher le fichier

    def test_reads_in_contig(self):
        log.info("Testing reads in contigs")
        ds = DataSet(data.getXml(8))
        dss = ds.split(contigs=True)
        self.assertEqual(len(dss), 12)
        efaec1TimesFound = 0
        efaec1TotFound = 0
        efaec2TimesFound = 0
        efaec2TotFound = 0
        for ds in dss:
            ef1 = len(list(ds.readsInReference('E.faecalis.1')))
            ef2 = len(list(ds.readsInReference('E.faecalis.2')))
            if ef1:
                efaec1TimesFound += 1
                efaec1TotFound += ef1
            if ef2:
                efaec2TimesFound += 1
                efaec2TotFound += ef2
        self.assertEqual(efaec1TimesFound, 1)
        self.assertEqual(efaec1TotFound, 20)
        self.assertEqual(efaec2TimesFound, 1)
        self.assertEqual(efaec2TotFound, 3)

        ds = DataSet(data.getXml(8))
        filt = Filters()
        filt.addRequirement(length=[('>', '100')])
        ds.addFilters(filt)
        dss = ds.split(contigs=True)
        self.assertEqual(len(dss), 12)
        efaec1TimesFound = 0
        efaec1TotFound = 0
        efaec2TimesFound = 0
        efaec2TotFound = 0
        for ds in dss:
            ef1 = len(list(ds.readsInReference('E.faecalis.1')))
            ef2 = len(list(ds.readsInReference('E.faecalis.2')))
            if ef1:
                efaec1TimesFound += 1
                efaec1TotFound += ef1
            if ef2:
                efaec2TimesFound += 1
                efaec2TotFound += ef2
        self.assertEqual(efaec1TimesFound, 1)
        self.assertEqual(efaec1TotFound, 20)
        self.assertEqual(efaec2TimesFound, 1)
        self.assertEqual(efaec2TotFound, 3)

        ds = DataSet(data.getXml(8))
        filt = Filters()
        filt.addRequirement(length=[('>', '1000')])
        ds.addFilters(filt)
        dss = ds.split(contigs=True)
        self.assertEqual(len(dss), 9)
        efaec1TimesFound = 0
        efaec1TotFound = 0
        efaec2TimesFound = 0
        efaec2TotFound = 0
        for ds in dss:
            ef1 = len(list(ds.readsInReference('E.faecalis.1')))
            ef2 = len(list(ds.readsInReference('E.faecalis.2')))
            if ef1:
                efaec1TimesFound += 1
                efaec1TotFound += ef1
            if ef2:
                efaec2TimesFound += 1
                efaec2TotFound += ef2
        self.assertEqual(efaec1TimesFound, 1)
        self.assertEqual(efaec1TotFound, 20)
        self.assertEqual(efaec2TimesFound, 1)
        self.assertEqual(efaec2TotFound, 1)

Exemple #16

0

Afficher le fichier

    def test_split_by_contigs_with_split_and_maxChunks(self):
        # test to make sure the refWindows work when chunks == # refs
        ds3 = DataSet(data.getBam())
        dss = ds3.split(contigs=True)
        self.assertEqual(len(dss), 12)
        refWindows = sorted(
            reduce(lambda x, y: x + y, [ds.refWindows for ds in dss]))
        # not all references have something mapped to them, refWindows doesn't
        # care...
        self.assertNotEqual(refWindows, sorted(ds3.refWindows))
        random_few = [('C.beijerinckii.13', 0, 1433),
                      ('B.vulgatus.4', 0, 1449), ('E.faecalis.1', 0, 1482)]
        for reference in random_few:
            found = False
            for ref in refWindows:
                if ref == reference:
                    found = True
            self.assertTrue(found)
        old_refWindows = refWindows

        dss = ds3.split(contigs=True, maxChunks=1)
        self.assertEqual(len(dss), 1)
        refWindows = sorted(
            reduce(lambda x, y: x + y, [ds.refWindows for ds in dss]))
        self.assertEqual(refWindows, old_refWindows)

        dss = ds3.split(contigs=True, maxChunks=24)
        # This isn't expected if num refs >= 100, as map check isn't made
        # for now (too expensive)
        # There are only 12 refs represented in this set, however...
        self.assertEqual(len(dss), 12)
        refWindows = sorted(
            reduce(lambda x, y: x + y, [ds.refWindows for ds in dss]))

        for ref in random_few:
            found = False
            for window in refWindows:
                if ref == window:
                    found = True
            if not found:
                log.debug(ref)
            self.assertTrue(found)

        dss = ds3.split(contigs=True, maxChunks=36)
        self.assertEqual(len(dss), 12)
        refWindows = sorted(
            reduce(lambda x, y: x + y, [ds.refWindows for ds in dss]))
        for ref in random_few:
            found = False
            for window in refWindows:
                if ref == window:
                    found = True
            self.assertTrue(found)

        dss = ds3.split(contigs=True, maxChunks=36, breakContigs=True)
        self.assertEqual(len(dss), 2)
        refWindows = sorted(
            reduce(lambda x, y: x + y, [ds.refWindows for ds in dss]))
        for ref in random_few:
            found = False
            for window in refWindows:
                if ref == window:
                    found = True
            self.assertTrue(found)