Python get_file Examples, pbtestdata.get_file Python Examples

Example #1

0

Show file

    def test_dataset_split_multi_movie(self):
        ds1 = pbtestdata.get_file("subreads-sequel")
        ds2 = pbtestdata.get_file("subreads-xml")
        tmp_ds = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        with SubreadSet(ds1, ds2) as ds:
            ds.write(tmp_ds)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        base_args = [
            "dataset", "split", "--maxChunks", "4", "--targetSize", "1",
            "--prefix", "tst_multi_ds"
        ]

        def run_and_validate(args, ds_sizes):
            outdir = tempfile.mkdtemp(suffix="dataset-unittest")
            final_args = base_args + args + ["--outdir", outdir, tmp_ds]
            self._check_cmd(" ".join(final_args))
            dss = [
                openDataSet(op.join(outdir, fn))
                for fn in sorted(os.listdir(outdir))
            ]
            assert [len(ds) for ds in dss] == ds_sizes

        run_and_validate(["--zmws"], [52, 22, 42, 21])
        #run_and_validate(["--auto"], [8, 12, 54, 63])
        run_and_validate(["--zmws", "--keepReadGroups"], [8, 12, 54, 63])

Example #2

0

Show file

 def test_dataset_create_set_sample_names(self):
     sample_args = "--well-sample-name WELLSAMPLE --bio-sample-name BIOSAMPLE".split(
     )
     outfile = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     cmd = " ".join([
         "dataset", "create", "--force", outfile,
         pbtestdata.get_file("subreads-bam")
     ] + sample_args)
     self._run_cmd_with_output(cmd, outfile)
     with SubreadSet(outfile) as ds:
         assert len(ds.metadata.collections) == 1
         assert ds.metadata.collections[0].wellSample.name == "WELLSAMPLE"
         assert ds.metadata.collections[0].wellSample.bioSamples[
             0].name == "BIOSAMPLE"
         assert len(ds.metadata.collections[0].wellSample.bioSamples) == 1
     # now with existing samples
     outfile = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     cmd = " ".join([
         "dataset", "create", "--force", outfile,
         pbtestdata.get_file("barcoded-subreadset")
     ] + sample_args)
     self._run_cmd_with_output(cmd, outfile)
     with SubreadSet(outfile) as ds:
         assert len(ds.metadata.collections) == 1
         assert ds.metadata.collections[0].wellSample.name == "WELLSAMPLE"
         biosamples = {
             s.name
             for s in ds.metadata.collections[0].wellSample.bioSamples
         }
         assert biosamples == {"BIOSAMPLE"}

Example #3

0

Show file

 def test_get_dataset_size(self):
     tiny_xml = pbtestdata.get_file("subreads-sequel")
     m = get_dataset_size(tiny_xml, True, True)
     assert m.numRecords == 20
     assert m.totalLengthMb == 1
     assert m.indexSizeGb == 2
     assert m.numResources == 1 and m.numFilters == 0
     m = get_dataset_size(tiny_xml, False, False)
     assert m.numRecords == 20
     assert m.totalLengthMb == 1
     assert m.indexSizeGb == 1
     m = get_dataset_size(self.BIG_DATA, True, True)
     assert m.numRecords == 805580876
     assert m.totalLengthMb == 271330
     assert m.indexSizeGb == 45
     assert m.numResources == 1 and m.numFilters == 0
     m = get_dataset_size(self.TINY_REF, False, False)
     assert m.numRecords == 1
     assert m.totalLengthMb == 1
     m = get_dataset_size(self.BIG_REF, False, False)
     assert m.numRecords == 86
     assert m.totalLengthMb == 2993
     ds_aln = pbtestdata.get_file("aligned-ds-2")
     m = get_dataset_size(ds_aln, True, True)
     assert m.numRecords == 21
     assert m.numResources == 2

Example #4

0

Show file

File: test_pbreports_summarize_coverage.py Project: PacificBiosciences/pbreports

 def setUpClass(cls):
     cls.xml_path = pbtestdata.get_file("aligned-xml")
     cls.ds_reader = AlignmentSet(cls.xml_path, strict=True,
                                  reference=pbtestdata.get_file("lambda-fasta"))
     cls.bam_readers = cls.ds_reader.resourceReaders()
     cls.interval_lists = summarize_coverage.build_interval_lists(
         cls.bam_readers)

Example #5

0

Show file

File: test_pbdataset_split.py Project: MShaffar19/pbcore

 def test_split_zmws_around_read_groups(self):
     ds1 = pbtestdata.get_file("subreads-xml")
     ds2 = pbtestdata.get_file("subreads-sequel")
     ds = SubreadSet(ds1, ds2)
     assert len(ds) == 137
     # this is still the default behavior
     chunks = list(ds.split(chunks=2, zmws=True, breakReadGroups=True))
     assert len(chunks[0]) == 72
     assert len(chunks[1]) == 65
     # don't break up movies
     chunks = list(ds.split(chunks=2, zmws=True, breakReadGroups=False))
     assert len(chunks[0]) == 20
     assert len(chunks[1]) == 117
     assert np.all(chunks[0].index.qId == -2081539485)
     assert np.all(chunks[1].index.qId == -1197849594)
     chunks = list(
         ds.split(chunks=4, targetSize=1, zmws=True, breakReadGroups=False))
     assert [len(c) for c in chunks] == [8, 12, 54, 63]
     assert np.all(chunks[0].index.qId == -2081539485)
     assert np.all(chunks[1].index.qId == -2081539485)
     assert np.all(chunks[2].index.qId == -1197849594)
     assert np.all(chunks[3].index.qId == -1197849594)
     # control: single-movie dataset
     ds = SubreadSet(ds1)
     chunks1 = list(ds.split(chunks=4, zmws=True, breakReadGroups=False))
     chunks2 = list(ds.split(chunks=4, zmws=True, breakReadGroups=True))
     assert [len(x) for x in chunks1] == [len(y) for y in chunks2]

Example #6

0

Show file

File: test_pbreports_report_coverage.py Project: MShaffar19/pbreports

class TestToolContractHgap(pbcommand.testkit.core.PbTestApp):
    DRIVER_BASE = "python -m pbreports.report.coverage_hgap"
    INPUT_FILES = [
        pbtestdata.get_file("lambda-fasta"),
        pbtestdata.get_file("alignment-summary-gff")
    ]
    IS_DISTRIBUTED = True
    RESOLVED_IS_DISTRIBUTED = True

Example #7

0

Show file

File: test_pbcore_io_align_pacbiobamindex.py Project: MShaffar19/pbcore

 def test_get_index_size_bytes(self):
     import pbtestdata
     ds = openDataSet(pbtestdata.get_file("subreads-sequel"))
     assert get_index_size_bytes(ds.externalResources[0].pbi) == 580
     ds2 = openDataSet(pbtestdata.get_file("ccs-barcoded"))
     assert get_index_size_bytes(ds2.externalResources[0].pbi) == 68
     ds3 = openDataSet(pbtestdata.get_file("aligned-xml"))
     assert get_index_size_bytes(ds3.externalResources[0].pbi) == 7504

Example #8

0

Show file

File: test_tasks_extract_unmapped_bam.py Project: MShaffar19/pbcoretools

 def test_run_bamsieve_extract_unmapped(self):
     mapped = _make_filtered(pbtestdata.get_file("aligned-xml"))
     subreads = pbtestdata.get_file("subreads-xml")
     args = [
         "bamsieve", "--subreads", "--blacklist", mapped, subreads,
         "unmapped.subreads.bam"
     ]
     self._check_call(args)
     assert_no_reads_in_common(self, mapped, "unmapped.subreads.bam")

Example #9

0

Show file

File: test_pbreports_summarize_coverage.py Project: MShaffar19/pbreports

 def setUpClass(cls):
     cls.xml_path = pbtestdata.get_file("aligned-xml")
     cls.ds_reader = AlignmentSet(
         cls.xml_path,
         strict=True,
         reference=pbtestdata.get_file("lambda-fasta"))
     cls.bam_readers = cls.ds_reader.resourceReaders()
     cls.interval_lists = summarize_coverage.build_interval_lists(
         cls.bam_readers)

Example #10

0

Show file

File: test_tasks_scatter_gather.py Project: pezmaster31/pbcoretools

class TestScatterCCSReference(pbcommand.testkit.core.PbTestScatterApp):
    DRIVER_BASE = "python -m pbcoretools.tasks.scatter_ccs_reference"
    INPUT_FILES = [
        pbtestdata.get_file("rsii-ccs"),
        pbtestdata.get_file("lambdaNEB")
    ]
    MAX_NCHUNKS = 8
    RESOLVED_MAX_NCHUNKS = 8
    CHUNK_KEYS = ("$chunk.ccsset_id", "$chunk.reference_id")

Example #11

0

Show file

File: test_tasks_scatter_gather.py Project: pezmaster31/pbcoretools

class TestScatterSubreadReference(pbcommand.testkit.core.PbTestScatterApp):
    DRIVER_BASE = "python -m pbcoretools.tasks.scatter_subread_reference"
    INPUT_FILES = [
        pbtestdata.get_file("subreads-xml"),
        pbtestdata.get_file("lambdaNEB")
    ]
    MAX_NCHUNKS = 3
    RESOLVED_MAX_NCHUNKS = 3
    CHUNK_KEYS = ("$chunk.subreadset_id", "$chunk.reference_id")

Example #12

0

Show file

File: test_pbreports_summarize_coverage.py Project: MShaffar19/pbreports

class TestSummarizeCoverage(pbcommand.testkit.PbTestApp):
    DRIVER_BASE = "python -m pbreports.report.summarize_coverage.summarize_coverage "
    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("aligned-xml"),
        pbtestdata.get_file("lambda-fasta")
    ]
    TASK_OPTIONS = {}

Example #13

0

Show file

class TestSummarizeConsensus(pbcommand.testkit.PbTestApp):
    DRIVER_BASE = "summarizeConsensus"
    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("alignment-summary-gff"),
        pbtestdata.get_file("variants-gff")
    ]
    TASK_OPTIONS = {}

Example #14

0

Show file

 def test_trust_counts(self):
     import pbtestdata
     f1 = pbtestdata.get_file("aligned-xml")
     f2 = pbtestdata.get_file("aligned-ds-2")
     ds = openDataFile(f1, f2, trustCounts=True)
     assert ds.numRecords == 133
     assert len(ds) == 133
     assert ds.totalLength == 274217
     assert ds._index is None
     assert len(ds._openReaders) == 0

Example #15

0

Show file

File: test_tasks_scatter_gather.py Project: pezmaster31/pbcoretools

class TestScatterSubreadsBarcoding(pbcommand.testkit.core.PbTestScatterApp):
    DRIVER_BASE = "python -m pbcoretools.tasks.scatter_subreads_bam2bam"
    INPUT_FILES = [
        # XXX not actually barcoded data, but it doesn't matter here
        pbtestdata.get_file("subreads-bam"),
        pbtestdata.get_file("barcodeset")
    ]
    MAX_NCHUNKS = 8
    RESOLVED_MAX_NCHUNKS = 8
    NCHUNKS_EXPECTED = 2
    CHUNK_KEYS = ("$chunk.subreadset_id", "$chunk.barcodeset_id")

Example #16

0

Show file

File: test_utils.py Project: lqsae/pbcommand

    def test_get_dataset_metadata(self):
        import pbtestdata
        md = get_dataset_metadata(pbtestdata.get_file("subreads-xml"))
        assert md.metatype == "PacBio.DataSet.SubreadSet"

        from pbcore.io import SubreadSet
        ds = SubreadSet(pbtestdata.get_file("subreads-xml"))
        assert md.uuid == ds.uuid

        with pytest.raises(Exception) as e:
            get_dataset_metadata(None)

Example #17

0

Show file

class TestPbalignMinorVariants(pbcommand.testkit.PbTestApp):
    DRIVER_BASE = "python -m pbalign.tasks.align_minorvariants"
    INPUT_FILES = [
        pbtestdata.get_file("rsii-ccs"),
        pbtestdata.get_file("lambdaNEB")
    ]

    def run_after(self, rtc, output_dir):
        ds_out = openDataSet(rtc.task.output_files[0])
        self.assertTrue(isinstance(ds_out, ConsensusAlignmentSet),
                        type(ds_out).__name__)

Example #18

0

Show file

 def test_gather_alignments_trust_counts(self):
     f1 = pbtestdata.get_file("aligned-xml")
     f2 = pbtestdata.get_file("aligned-ds-2")
     tmp_out = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name
     args = [
         "dataset", "create", "--trustCounts",
         tmp_out, f1, f2
     ]
     assert subprocess.check_call(args) == 0
     ds = AlignmentSet(tmp_out, trustCounts=True)
     assert ds.numRecords == 133
     assert ds.totalLength == 274217

Example #19

0

Show file

File: test_pbreports_report_mapping_stats.py Project: MShaffar19/pbreports

class TestPbreportMappingStatsHGAP(pbcommand.testkit.PbTestApp):
    DRIVER_BASE = "python -m pbreports.report.mapping_stats_hgap"
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("aligned-internal-subreads"),
        pbtestdata.get_file("internal-subreads")
    ]

    def run_after(self, rtc, output_dir):
        r = load_report_from_json(rtc.task.output_files[0])
        a = r.attributes[0]
        self.assertEqual(a.id, Constants.A_PCT_MAPPED)
        self.assertAlmostEqual(a.value, 0.9137, delta=0.0001)

Example #20

0

Show file

File: test_pbreports_report_top_variants.py Project: MShaffar19/pbreports

class TestPbreportTopVariants(pbcommand.testkit.PbTestApp):
    from pbreports.report.top_variants import Constants
    DRIVER_BASE = "python -m pbreports.report.top_variants "
    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("variants-gff"),
        pbtestdata.get_file("lambda-fasta")
    ]
    TASK_OPTIONS = {
        Constants.HOW_MANY_ID: Constants.HOW_MANY_DEFAULT,
        Constants.BATCH_SORT_SIZE_ID: Constants.BATCH_SORT_SIZE_DEFAULT,
    }

Example #21

0

Show file

class TestEstimateLimaMemory(PbIntegrationBase):
    TINY_DATA = pbtestdata.get_file("subreads-sequel")
    TINY_BARCODES = pbtestdata.get_file("barcodeset")
    BIG_BARCODES = "/pbi/dept/secondary/siv/barcodes/Sequel_RSII_384_barcodes_v1/Sequel_RSII_384_barcodes_v1.barcodeset.xml"
    BIG_DATA = "/pbi/dept/secondary/siv/testdata/Spider/all4mers/rSPOC1_20180629_223342/1_A01/mSPOC1_180629_223410.subreadset.xml"
    CCS_DATA = "/pbi/dept/secondary/siv/testdata/SA3-Sequel/bcol/m54119_161211_175055.consensusreadset.xml"

    def test_estimate_lima_memory(self):
        mem_gb = estimate_lima_memory(self.TINY_BARCODES, self.TINY_DATA, True)
        assert mem_gb == 2
        # this is silly of course.  but it's technically possible with the
        # Sequel II system, so we might as well just deal with it
        mem_gb = estimate_lima_memory(self.BIG_BARCODES, self.BIG_DATA, False)
        assert mem_gb == 2752
        # this is a more realistic case - 147K barcode pairs but the BAM file
        # is small enough to fit in the default footprint
        mem_gb = estimate_lima_memory(self.BIG_BARCODES, self.CCS_DATA, False)
        assert mem_gb == 7

    def test_integration_tiny(self):
        args = [
            "python3", "-m", "pbcoretools.tasks.memory.estimate_lima_memory",
            self.TINY_BARCODES, self.TINY_DATA, "--symmetric"
        ]
        self._check_call(args)
        with open("lima_mem_gb.txt") as txt_out:
            assert txt_out.read() == "2"

    def test_integration_big(self):
        args = [
            "python3", "-m", "pbcoretools.tasks.memory.estimate_lima_memory",
            self.BIG_BARCODES, self.BIG_DATA, "--asymmetric"
        ]
        self._check_call(args)
        with open("lima_mem_gb.txt") as txt_out:
            assert txt_out.read() == "2752"

    def test_defined_biosamples(self):
        # XXX awful dependency but it makes testing easier
        from pbcoretools.file_utils import set_bio_samples
        ds_tmp = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        bc = openDataSet(self.BIG_BARCODES)
        with openDataSet(self.BIG_DATA, trustCounts=True) as ds:
            bcs = [("bc1001--bc1{:03d}".format(x), "Sample {}".format(x))
                   for x in range(384)]
            set_bio_samples(ds, bcs)
            ds.write(ds_tmp)
        mem_gb = estimate_lima_memory(self.BIG_BARCODES, ds_tmp, False)
        assert mem_gb == 2

Example #22

0

Show file

File: test_utils.py Project: mpkocher/pbcommand

 def test_get_dataset_metadata(self):
     try:
         import pbtestdata
     except ImportError:
         raise unittest.SkipTest("pbtestdata not available, skipping")
     else:
         md = get_dataset_metadata(pbtestdata.get_file("subreads-xml"))
         self.assertEqual(md.metatype, "PacBio.DataSet.SubreadSet")
         try:
             from pbcore.io import SubreadSet
         except ImportError:
             raise unittest.SkipTest("pbcore not available, skipping")
         else:
             ds = SubreadSet(pbtestdata.get_file("subreads-xml"))
             self.assertEqual(md.uuid, ds.uuid)

Example #23

0

Show file

class TestPbalign(pbcommand.testkit.PbTestApp):
    DRIVER_BASE = "pbalign "
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("subreads-xml"),
        pbtestdata.get_file("lambdaNEB")
    ]
    TASK_OPTIONS = {
        "pbalign.task_options.algorithm_options": "--holeNumbers 1-1000,30000-30500,60000-60600,100000-100500",
    }

    def run_after(self, rtc, output_dir):
        ds_out = openDataSet(rtc.task.output_files[0])
        self.assertTrue(isinstance(ds_out, AlignmentSet),
                        type(ds_out).__name__)

Example #24

0

Show file

class TestToolContract(pbcommand.testkit.PbTestApp):
    DATA_DIR = op.join(LOCAL_DATA, "variants")
    DRIVER_BASE = "python -m pbreports.report.variants"
    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("lambda-fasta"),
        pbtestdata.get_file("consensus-summary-gff"),
        pbtestdata.get_file("variants-gff")
    ]
    TASK_OPTIONS = {
        "pbreports.task_options.max_contigs": 25,
        "pbreports.task_options.dpi": 60,
        "pbreports.task_options.dumpdata": True,
    }

Example #25

0

Show file

File: test_pbreports_report_stats_xml_reports.py Project: MShaffar19/pbreports

    def test_make_filter_stats_report_sts_xml(self):
        """
        Test the content of the filter report generated from an sts.xml
        """
        sts_xml = pbtestdata.get_file("stats-xml")
        rpt = make_filter_report(sts_xml, self.get_output_dir())
        d = json.loads(rpt.to_json())
        self._compare_attribute_values(report_d=d,
                                       expected_d={
                                           Constants.A_NBASES: 1672335649,
                                           Constants.A_NREADS: 394658,
                                           Constants.A_READ_N50: 7750,
                                           Constants.A_READ_LENGTH: 4237
                                       })
        self.assertTrue(
            os.path.exists(
                os.path.join(self.get_output_dir(), 'readLenDist0.png')))
        # self.assertTrue(os.path.exists(os.path.join(
        #    self.get_output_dir(),
        #    'readQualDist0.png')))

        # these are from a raw STS file
        self.assertEqual(len(rpt._dataset_uuids), 0,
                         "Incorrect report datasets uuids")
        print pformat(rpt.to_dict())
        validate_report_complete(self, rpt)

Example #26

0

Show file

 def setup_class(cls):
     bam_files = []
     with SubreadSet(pbtestdata.get_file("barcoded-subreadset")) as ds_in:
         for er in ds_in.externalResources:
             bam_files.append(er.bam)
     with SubreadSet(*bam_files, strict=True) as ds_out:
         ds_out.write(cls.INPUT_FILE)

Example #27

0

Show file

File: test_tasks_make_trimmed_dataset.py Project: MShaffar19/pbcoretools

 def test_integration(self):
     ccs_barcoded = pbtestdata.get_file("ccs-barcoded")
     datastore = tempfile.NamedTemporaryFile(suffix=".datastore.json").name
     lima_out = tempfile.NamedTemporaryFile(
         suffix=".consensusreadset.xml").name
     ccs_in = tempfile.NamedTemporaryFile(
         suffix=".consensusreadset.xml").name
     with ConsensusReadSet(ccs_barcoded) as ccs_tmp:
         ccs_tmp.name = "My Data (filtered)"
         ccs_tmp.tags = "ccs,filtered"
         ccs_tmp.write(ccs_in)
         ccs_tmp.name = "lima out"
         ccs_tmp.write(lima_out)
     ds = DataStore([
         DataStoreFile(uuid.uuid4(), "lima", FileTypes.DS_CCS.file_type_id,
                       lima_out)
     ])
     ds.write_json(datastore)
     args = [
         "python3", "-m", "pbcoretools.tasks.make_trimmed_dataset",
         datastore, ccs_in
     ]
     self._check_call(args)
     with ConsensusReadSet("trimmed.consensusreadset.xml",
                           trustCounts=True) as ccs_out:
         assert ccs_out.numRecords > 0
         assert ccs_out.name == "My Data (trimmed)"
         assert ccs_out.tags == "ccs"

Example #28

0

Show file

 def test__read_in_indexed_alignmentset(self):
     bam = pbtestdata.get_file("aligned-bam")
     data = _read_in_indexed_alignmentset(bam)
     self.assertTrue(all([row[2] == 254 for row in data]))
     self.assertEqual(len(data), 112)
     self.assertEqual(data[-1][0], 605)
     self.assertTrue(0.927 < data[-1][1] < 0.928)

Example #29

0

Show file

File: test_pbreports_plot_rainbow.py Project: mdsmith/pbreports

 def test__read_in_indexed_alignmentset(self):
     bam = pbtestdata.get_file("aligned-bam")
     data = _read_in_indexed_alignmentset(bam)
     self.assertTrue(all([row[2]==254 for row in data]))
     self.assertEqual(len(data), 112)
     self.assertEqual(data[-1][0], 605)
     self.assertTrue(0.927 < data[-1][1] < 0.928)

Example #30

0

Show file

File: test_pbreports_report_mapping_stats.py Project: MShaffar19/pbreports

class TestIntegrationMappingStatsReport(unittest.TestCase):
    ALIGNMENTS = pbtestdata.get_file("aligned-bam")

    def setUp(self):
        self.output_dir = tempfile.mkdtemp(suffix="_mapping_stats")
        self.aligned_reads_bam = self.ALIGNMENTS
        t = tempfile.NamedTemporaryFile(delete=False,
                                        suffix="mapping_report.json")
        t.close()
        self.report_json = t.name

    def test_basic(self):
        cmd = _to_cmd(self.ALIGNMENTS, self.report_json)
        rcode = run_backticks(cmd)
        self.assertEqual(rcode, 0)
        with open(self.report_json, 'r') as f:
            s = json.load(f)
            log.info("JsonReport: ")
            log.info(pprint.pformat(s, indent=4))
        report = dict_to_report(s)
        self.assertIsNotNone(report)
        self.assertEqual(len(report.tables), 1)
        log.info(str(report.tables[0]))
        validate_report_metadata(self, report, spec)
        validate_report_complete(self, report)

Example #31

0

Show file

File: test_file_utils.py Project: MShaffar19/pbcoretools

 def test_update_barcoded_sample_metadata(self):
     datastore_tmp = tempfile.NamedTemporaryFile(
         suffix=".datastore.json").name
     barcodes = pbtestdata.get_file("barcodeset")
     ds = split_barcoded_dataset(self.SUBREADS)
     ds.write_json(datastore_tmp)
     base_dir = tempfile.mkdtemp()
     datastore = update_barcoded_sample_metadata(base_dir, datastore_tmp,
                                                 self.SUBREADS, barcodes)
     validate_barcoded_datastore_files(self, self.SUBREADS, datastore)
     # now with use_barcode_uuids=False
     datastore = update_barcoded_sample_metadata(base_dir,
                                                 datastore_tmp,
                                                 self.SUBREADS,
                                                 barcodes,
                                                 use_barcode_uuids=False)
     validate_barcoded_datastore_files(self,
                                       self.SUBREADS,
                                       datastore,
                                       use_barcode_uuids=False)
     # test that it doesn't break with no collection metadata
     ss = SubreadSet(self.SUBREADS)
     ss.metadata.collections = None
     ss_tmp = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ss.write(ss_tmp)
     ds = split_barcoded_dataset(ss_tmp)
     ds.write_json(datastore_tmp)
     base_dir = tempfile.mkdtemp()
     datastore = update_barcoded_sample_metadata(base_dir, datastore_tmp,
                                                 self.SUBREADS, barcodes)
     validate_barcoded_datastore_files(self,
                                       self.SUBREADS,
                                       datastore,
                                       have_collection_metadata=False,
                                       number_of_expected_collections=0)

Example #32

0

Show file

 def test_integration_simple(self):
     ds_in = pbtestdata.get_file("ccs-sequel")
     args = [
         "python3", "-m", "pbcoretools.tasks.consolidate_reads_bam", ds_in
     ]
     self._check_call(args)
     assert op.isfile("reads.bam")

Example #33

0

Show file

File: test_pbreports_report_stats_xml_reports.py Project: PacificBiosciences/pbreports

    def test_make_filter_stats_report_sts_xml(self):
        """
        Test the content of the filter report generated from an sts.xml
        """
        sts_xml = pbtestdata.get_file("stats-xml")
        rpt = make_filter_report(sts_xml, self.get_output_dir())
        d = json.loads(rpt.to_json())
        self._compare_attribute_values(
            report_d=d,
            expected_d={
                Constants.A_NBASES: 1672335649,
                Constants.A_NREADS: 394658,
                Constants.A_READ_N50: 7750,
                Constants.A_READ_LENGTH: 4237,
            },
        )
        self.assertTrue(os.path.exists(os.path.join(self.get_output_dir(), "readLenDist0.png")))
        # self.assertTrue(os.path.exists(os.path.join(
        #    self.get_output_dir(),
        #    'readQualDist0.png')))

        # these are from a raw STS file
        self.assertEqual(len(rpt._dataset_uuids), 0, "Incorrect report datasets uuids")
        print pformat(rpt.to_dict())
        validate_report_complete(self, rpt)

Example #34

0

Show file

File: test_pbdataset_subtypes.py Project: PacificBiosciences/pbcore

 def test_provenance_record_ordering(self):
     import pbtestdata
     ds = SubreadSet(pbtestdata.get_file("subreads-sequel"), strict=True)
     ds.metadata.addParentDataSet(uuid.uuid4(), ds.datasetType, createdBy="AnalysisJob", timeStampedName="")
     tmp_out = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds.write(tmp_out)
     ds = SubreadSet(tmp_out, strict=True)
     tags = [r['tag'] for r in ds.metadata.record['children']]
     self.assertEqual(tags, ['TotalLength', 'NumRecords', 'Provenance', 'Collections', 'SummaryStats'])

Example #35

0

Show file

File: test_tool_contract.py Project: PacificBiosciences/unanimity

 def setUp(self):
     BAM_IN = pbtestdata.get_file("subreads-bam")
     ds = SubreadSet(BAM_IN, strict=True)
     chunks = ds.split(zmws=True, chunks=2, targetSize=2)
     assert len(chunks) == 2
     self.zmw_range = chunks[CHUNK_INDEX].zmwRanges[0][1:3]
     logging.info("zmwRanges[CHUNK_INDEX] = {r}".format(
         r=str(chunks[CHUNK_INDEX].zmwRanges)))
     logging.info("SubreadSet = {f}".format(f=self.INPUT_FILES[0]))
     chunks[CHUNK_INDEX].write(self.INPUT_FILES[0])

Example #36

0

Show file

File: test_pbreports_report_ccs.py Project: PacificBiosciences/pbreports

 def test_ccs_barcodes_table(self):
     CCS_DS = pbtestdata.get_file("ccs-barcoded")
     ds = ConsensusReadSet(CCS_DS)
     r = to_report(ds, tempfile.mkdtemp())
     self.assertEqual([c.values for c in r.tables[1].columns[0:4]],
                      [["lbc1", "lbc3"], [1, 1], [1958, 1954], [1958, 1954]])
     self.assertAlmostEqual(r.tables[1].columns[4].values[0], 0.9724,
                            places=4)
     self.assertAlmostEqual(r.tables[1].columns[4].values[1], 0.9926,
                            places=4)

Example #37

0

Show file

File: test_pbreports_summarize_coverage.py Project: PacificBiosciences/pbreports

    def test_exit_code_0(self):
        """
        Like a cram test. Assert exits with 0, even though region size is 0 See
        bug 25079
        """
        from pbcore.util.Process import backticks
        import tempfile
        ref = pbtestdata.get_file("lambda-fasta")
        tiny_reads = pbtestdata.get_file("aligned-xml")
        out = os.path.join(tempfile.mkdtemp(suffix="summ_cov"), 'gff')
        cmd = 'summarize_coverage --region_size=0 --num_regions=500 {a} {r} {g}'.format(
            a=tiny_reads, r=ref, g=out)

        o, c, m = backticks(cmd)
        log.info(cmd)
        if c is not 0:
            log.error(m)
            log.error(o)
            print(m)
        self.assertEquals(0, c)
        self.assertTrue(
            os.path.exists(os.path.join(out)))

Example #38

0

Show file

File: test_pbreports_report_stats_xml_reports.py Project: PacificBiosciences/pbreports

 def test_adapter_exit_code_0(self):
     subreads_xml = pbtestdata.get_file("subreads-sequel")
     cmd = "adapter_xml {c} {r}".format(r="foo.json", c=subreads_xml)
     o, c, m = backticks(cmd)
     print "COMMAND: {c}".format(c=cmd)
     log.info(cmd)
     print "o: {o}".format(o=o)
     print "c: {c}".format(c=c)
     print "m: {m}".format(m=m)
     if c is not 0:
         log.error(m)
         log.error(o)
     self.assertEquals(0, c)

Example #39

0

Show file

File: test_pbreports_report_stats_xml_reports.py Project: PacificBiosciences/pbreports

 def test_loading_exit_code_0(self):
     sts_xml = pbtestdata.get_file("stats-xml")
     cmd = "loading_xml {c} {r}".format(r="foo.json", c=sts_xml)
     o, c, m = backticks(cmd)
     print "COMMAND: {c}".format(c=cmd)
     log.info(cmd)
     print "o: {o}".format(o=o)
     print "c: {c}".format(c=c)
     print "m: {m}".format(m=m)
     if c is not 0:
         log.error(m)
         log.error(o)
     self.assertEquals(0, c)

Example #40

0

Show file

File: test_pbdataset_metadata.py Project: PacificBiosciences/pbcore

 def test_merge_biosamples(self):
     import pbtestdata
     ds1 = pbtestdata.get_file("subreads-biosample-1")
     ds2 = pbtestdata.get_file("subreads-biosample-2")
     # Case 1: two biosamples
     ds = SubreadSet(ds1, ds2)
     samples = [bs.name for bs in ds.metadata.bioSamples]
     self.assertEqual(samples, ["Alice", "Bob"])
     # Case 2: same biosample in both files
     ds = SubreadSet(ds1, ds1)
     samples = [bs.name for bs in ds.metadata.bioSamples]
     self.assertEqual(samples, ["Alice"])
     self.assertEqual(len(ds.metadata.bioSamples[0].DNABarcodes), 1)
     # Case 3: same biosample, different barcodes
     dsTmp = SubreadSet(ds1)
     dsTmp.metadata.bioSamples[0].DNABarcodes[0].name = "F7--R7"
     tmpFile = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     dsTmp.write(tmpFile)
     ds = SubreadSet(ds1, tmpFile)
     samples = [bs.name for bs in ds.metadata.bioSamples]
     self.assertEqual(samples, ["Alice"])
     bcs = [bc.name for bc in ds.metadata.bioSamples[0].DNABarcodes]
     self.assertEqual(bcs, ["F1--R1", "F7--R7"])

Example #41

0

Show file

File: test_pbreports_report_mapping_stats.py Project: mdsmith/pbreports

    def setUpClass(cls):
        cls.output_dir = tempfile.mkdtemp(suffix="_mapping_stats")
        cls.aligned_reads_xml = pbtestdata.get_file("rsii-ccs-aligned")
        t = tempfile.NamedTemporaryFile(
            delete=False, suffix="mapping_report.json")
        t.close()
        cls.report_json = t.name
        cls.report = mapping_stats_ccs.to_report(cls.aligned_reads_xml,
                                                 cls.output_dir)
        cls.report.write_json(cls.report_json)

        if isinstance(cls.report, Report):
            log.info(pprint.pformat(cls.report.to_dict()))
            for table in cls.report.tables:
                log.info(str(table))

Example #42

0

Show file

File: test_pbreports_report_stats_xml_reports.py Project: PacificBiosciences/pbreports

 def test_filter_exit_code_0(self):
     tmpdir = tempfile.mkdtemp()
     cwd = os.getcwd()
     sts_xml = pbtestdata.get_file("subreads-sequel")
     cmd = "filter_stats_xml {c} {r}".format(r="foo.json", c=sts_xml)
     o, c, m = backticks(cmd)
     print "COMMAND: {c}".format(c=cmd)
     log.info(cmd)
     print "o: {o}".format(o=o)
     print "c: {c}".format(c=c)
     print "m: {m}".format(m=m)
     if c is not 0:
         log.error(m)
         log.error(o)
     self.assertEquals(0, c)

Example #43

0

Show file

File: test_pbreports_report_stats_xml_reports.py Project: PacificBiosciences/pbreports

    def test_make_filter_stats_report_dataset(self):
        """
        Test the content of the filter report generated from a dataset
        """
        sts_xml = pbtestdata.get_file("subreads-sequel")
        rpt = make_filter_report(sts_xml, self.get_output_dir())
        d = json.loads(rpt.to_json())
        self._compare_attribute_values(
            report_d=d,
            expected_d={
                Constants.A_NBASES: 1672335649,
                Constants.A_NREADS: 394658,
                Constants.A_READ_N50: 7750,
                Constants.A_READ_LENGTH: 4237,
            },
        )

        self.assertTrue(os.path.exists(os.path.join(self.get_output_dir(), "readLenDist0.png")))

Example #44

0

Show file

File: test_tasks_converters.py Project: PacificBiosciences/pbcoretools

def _get_bax2bam_inputs():
    """Little hackery to get the setup class Inputs and to avoid calls to
    setupclass if skiptest is used

    Nat: we want to test that this behaves properly when multiple movies are
    supplied as input, so we make an HdfSubreadSet on the fly from various
    bax files in testdata
    """
    if HAVE_DATA_AND_BAX2BAM:
        hdf_subread_xml = tempfile.NamedTemporaryFile(suffix=".hdfsubreadset.xml").name

        bax_files = (SIV_DATA_DIR + "/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.bax.h5",
                     pbtestdata.get_file("rsii-bax-h5"))
        ds = HdfSubreadSet(*bax_files)
        ds.name = "lambda_rsii"
        assert len(set([f.movieName for f in ds.resourceReaders()])) == 2
        ds.write(hdf_subread_xml)
        return [hdf_subread_xml]
    else:
        # Assume the test data isn't found and the test won't be run
        return ["/path/to/this-test-should-be-skipped.txt"]

Example #45

0

Show file

File: test_pbdataset_subtypes.py Project: PacificBiosciences/pbcore

 def test_consensus_read_set_ref(self):
     import pbtestdata
     ds = ConsensusReadSet(pbtestdata.get_file("ccs-sequel"), strict=True)
     uuid = ds.metadata.collections[0].consensusReadSetRef.uuid
     self.assertEqual(uuid, "5416f525-d3c7-496b-ba8c-18d7ec1b4499")

Example #46

0

Show file

File: test_tasks_scatter_gather.py Project: mdsmith/pbcoretools

 def _generate_chunk_output_file(self, i=None):
     return self._copy_mock_output_file(pbtestdata.get_file("ccs-bam-aligned"))

Example #47

0

Show file

File: test_tasks_scatter_gather.py Project: mdsmith/pbcoretools

 def _generate_chunk_output_file(self, i=None):
     return self._copy_mock_output_file(pbtestdata.get_file("subreads-bam"))

Example #48

0

Show file

File: test_pbreports_report_barcode.py Project: PacificBiosciences/pbreports

 def setUp(self):
     self.barcodes = pbtestdata.get_file("barcodeset")
     self.subreads = pbtestdata.get_file("barcoded-subreadset")
     self.ccs = False

Example #49

0

Show file

File: test_bamsieve.py Project: Debian/pbcoretools

import shutil
import os.path as op
import os

from pbcore.io import openDataFile, openDataSet, BamReader

import pbtestdata

from pbcoretools import bamSieve

DATA_DIR = op.join(op.dirname(op.dirname(__file__)), "data")
SUBREADS1 = op.join(DATA_DIR, "tst_1_subreads.bam")
DS1 = op.join(DATA_DIR, "tst_1.subreadset.xml")
SUBREADS2 = op.join(DATA_DIR, "tst_3_subreads.bam")
DS2 = op.join(DATA_DIR, "tst_3.subreadset.xml")
SUBREADS3 = pbtestdata.get_file("subreads-bam")
SUBREADS4 = pbtestdata.get_file("aligned-bam")
CCS = pbtestdata.get_file("ccs-bam")
BARCODED = pbtestdata.get_file("barcoded-subreads-bam")
BARCODED_DS = pbtestdata.get_file("barcoded-subreadset")

class TestBamSieve(unittest.TestCase):

    def test_whitelist(self):
        ofn = tempfile.NamedTemporaryFile(suffix=".bam").name
        WHITELIST = set([24962, 32901, 30983])

        def _run_with_whitelist(wl):
            rc = bamSieve.filter_reads(
                input_bam=SUBREADS3,
                output_bam=ofn,

Example #50

0

Show file

File: test_pbreports_summarize_coverage.py Project: PacificBiosciences/pbreports

 def setUp(self):
     self.aln_path = pbtestdata.get_file("aligned-xml")
     self.gff_path = pbtestdata.get_file("alignment-summary-gff")
     self.ref_path = pbtestdata.get_file("lambda-fasta")
     self.selected_reference = None

Example #51

0

Show file

File: test_pbreports_report_sat.py Project: PacificBiosciences/pbreports

 def getAlignmentSet(self):
     return pbtestdata.get_file("aligned-bam")