コード例 #1
0
    def test_dataset_split_multi_movie(self):
        ds1 = pbtestdata.get_file("subreads-sequel")
        ds2 = pbtestdata.get_file("subreads-xml")
        tmp_ds = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        with SubreadSet(ds1, ds2) as ds:
            ds.write(tmp_ds)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        base_args = [
            "dataset", "split", "--maxChunks", "4", "--targetSize", "1",
            "--prefix", "tst_multi_ds"
        ]

        def run_and_validate(args, ds_sizes):
            outdir = tempfile.mkdtemp(suffix="dataset-unittest")
            final_args = base_args + args + ["--outdir", outdir, tmp_ds]
            self._check_cmd(" ".join(final_args))
            dss = [
                openDataSet(op.join(outdir, fn))
                for fn in sorted(os.listdir(outdir))
            ]
            assert [len(ds) for ds in dss] == ds_sizes

        run_and_validate(["--zmws"], [52, 22, 42, 21])
        #run_and_validate(["--auto"], [8, 12, 54, 63])
        run_and_validate(["--zmws", "--keepReadGroups"], [8, 12, 54, 63])
コード例 #2
0
 def test_dataset_create_set_sample_names(self):
     sample_args = "--well-sample-name WELLSAMPLE --bio-sample-name BIOSAMPLE".split(
     )
     outfile = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     cmd = " ".join([
         "dataset", "create", "--force", outfile,
         pbtestdata.get_file("subreads-bam")
     ] + sample_args)
     self._run_cmd_with_output(cmd, outfile)
     with SubreadSet(outfile) as ds:
         assert len(ds.metadata.collections) == 1
         assert ds.metadata.collections[0].wellSample.name == "WELLSAMPLE"
         assert ds.metadata.collections[0].wellSample.bioSamples[
             0].name == "BIOSAMPLE"
         assert len(ds.metadata.collections[0].wellSample.bioSamples) == 1
     # now with existing samples
     outfile = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     cmd = " ".join([
         "dataset", "create", "--force", outfile,
         pbtestdata.get_file("barcoded-subreadset")
     ] + sample_args)
     self._run_cmd_with_output(cmd, outfile)
     with SubreadSet(outfile) as ds:
         assert len(ds.metadata.collections) == 1
         assert ds.metadata.collections[0].wellSample.name == "WELLSAMPLE"
         biosamples = {
             s.name
             for s in ds.metadata.collections[0].wellSample.bioSamples
         }
         assert biosamples == {"BIOSAMPLE"}
コード例 #3
0
 def test_get_dataset_size(self):
     tiny_xml = pbtestdata.get_file("subreads-sequel")
     m = get_dataset_size(tiny_xml, True, True)
     assert m.numRecords == 20
     assert m.totalLengthMb == 1
     assert m.indexSizeGb == 2
     assert m.numResources == 1 and m.numFilters == 0
     m = get_dataset_size(tiny_xml, False, False)
     assert m.numRecords == 20
     assert m.totalLengthMb == 1
     assert m.indexSizeGb == 1
     m = get_dataset_size(self.BIG_DATA, True, True)
     assert m.numRecords == 805580876
     assert m.totalLengthMb == 271330
     assert m.indexSizeGb == 45
     assert m.numResources == 1 and m.numFilters == 0
     m = get_dataset_size(self.TINY_REF, False, False)
     assert m.numRecords == 1
     assert m.totalLengthMb == 1
     m = get_dataset_size(self.BIG_REF, False, False)
     assert m.numRecords == 86
     assert m.totalLengthMb == 2993
     ds_aln = pbtestdata.get_file("aligned-ds-2")
     m = get_dataset_size(ds_aln, True, True)
     assert m.numRecords == 21
     assert m.numResources == 2
 def setUpClass(cls):
     cls.xml_path = pbtestdata.get_file("aligned-xml")
     cls.ds_reader = AlignmentSet(cls.xml_path, strict=True,
                                  reference=pbtestdata.get_file("lambda-fasta"))
     cls.bam_readers = cls.ds_reader.resourceReaders()
     cls.interval_lists = summarize_coverage.build_interval_lists(
         cls.bam_readers)
コード例 #5
0
 def test_split_zmws_around_read_groups(self):
     ds1 = pbtestdata.get_file("subreads-xml")
     ds2 = pbtestdata.get_file("subreads-sequel")
     ds = SubreadSet(ds1, ds2)
     assert len(ds) == 137
     # this is still the default behavior
     chunks = list(ds.split(chunks=2, zmws=True, breakReadGroups=True))
     assert len(chunks[0]) == 72
     assert len(chunks[1]) == 65
     # don't break up movies
     chunks = list(ds.split(chunks=2, zmws=True, breakReadGroups=False))
     assert len(chunks[0]) == 20
     assert len(chunks[1]) == 117
     assert np.all(chunks[0].index.qId == -2081539485)
     assert np.all(chunks[1].index.qId == -1197849594)
     chunks = list(
         ds.split(chunks=4, targetSize=1, zmws=True, breakReadGroups=False))
     assert [len(c) for c in chunks] == [8, 12, 54, 63]
     assert np.all(chunks[0].index.qId == -2081539485)
     assert np.all(chunks[1].index.qId == -2081539485)
     assert np.all(chunks[2].index.qId == -1197849594)
     assert np.all(chunks[3].index.qId == -1197849594)
     # control: single-movie dataset
     ds = SubreadSet(ds1)
     chunks1 = list(ds.split(chunks=4, zmws=True, breakReadGroups=False))
     chunks2 = list(ds.split(chunks=4, zmws=True, breakReadGroups=True))
     assert [len(x) for x in chunks1] == [len(y) for y in chunks2]
コード例 #6
0
class TestToolContractHgap(pbcommand.testkit.core.PbTestApp):
    DRIVER_BASE = "python -m pbreports.report.coverage_hgap"
    INPUT_FILES = [
        pbtestdata.get_file("lambda-fasta"),
        pbtestdata.get_file("alignment-summary-gff")
    ]
    IS_DISTRIBUTED = True
    RESOLVED_IS_DISTRIBUTED = True
コード例 #7
0
 def test_get_index_size_bytes(self):
     import pbtestdata
     ds = openDataSet(pbtestdata.get_file("subreads-sequel"))
     assert get_index_size_bytes(ds.externalResources[0].pbi) == 580
     ds2 = openDataSet(pbtestdata.get_file("ccs-barcoded"))
     assert get_index_size_bytes(ds2.externalResources[0].pbi) == 68
     ds3 = openDataSet(pbtestdata.get_file("aligned-xml"))
     assert get_index_size_bytes(ds3.externalResources[0].pbi) == 7504
コード例 #8
0
 def test_run_bamsieve_extract_unmapped(self):
     mapped = _make_filtered(pbtestdata.get_file("aligned-xml"))
     subreads = pbtestdata.get_file("subreads-xml")
     args = [
         "bamsieve", "--subreads", "--blacklist", mapped, subreads,
         "unmapped.subreads.bam"
     ]
     self._check_call(args)
     assert_no_reads_in_common(self, mapped, "unmapped.subreads.bam")
コード例 #9
0
 def setUpClass(cls):
     cls.xml_path = pbtestdata.get_file("aligned-xml")
     cls.ds_reader = AlignmentSet(
         cls.xml_path,
         strict=True,
         reference=pbtestdata.get_file("lambda-fasta"))
     cls.bam_readers = cls.ds_reader.resourceReaders()
     cls.interval_lists = summarize_coverage.build_interval_lists(
         cls.bam_readers)
コード例 #10
0
class TestScatterCCSReference(pbcommand.testkit.core.PbTestScatterApp):
    DRIVER_BASE = "python -m pbcoretools.tasks.scatter_ccs_reference"
    INPUT_FILES = [
        pbtestdata.get_file("rsii-ccs"),
        pbtestdata.get_file("lambdaNEB")
    ]
    MAX_NCHUNKS = 8
    RESOLVED_MAX_NCHUNKS = 8
    CHUNK_KEYS = ("$chunk.ccsset_id", "$chunk.reference_id")
コード例 #11
0
class TestScatterSubreadReference(pbcommand.testkit.core.PbTestScatterApp):
    DRIVER_BASE = "python -m pbcoretools.tasks.scatter_subread_reference"
    INPUT_FILES = [
        pbtestdata.get_file("subreads-xml"),
        pbtestdata.get_file("lambdaNEB")
    ]
    MAX_NCHUNKS = 3
    RESOLVED_MAX_NCHUNKS = 3
    CHUNK_KEYS = ("$chunk.subreadset_id", "$chunk.reference_id")
コード例 #12
0
class TestSummarizeCoverage(pbcommand.testkit.PbTestApp):
    DRIVER_BASE = "python -m pbreports.report.summarize_coverage.summarize_coverage "
    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("aligned-xml"),
        pbtestdata.get_file("lambda-fasta")
    ]
    TASK_OPTIONS = {}
コード例 #13
0
class TestSummarizeConsensus(pbcommand.testkit.PbTestApp):
    DRIVER_BASE = "summarizeConsensus"
    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("alignment-summary-gff"),
        pbtestdata.get_file("variants-gff")
    ]
    TASK_OPTIONS = {}
コード例 #14
0
 def test_trust_counts(self):
     import pbtestdata
     f1 = pbtestdata.get_file("aligned-xml")
     f2 = pbtestdata.get_file("aligned-ds-2")
     ds = openDataFile(f1, f2, trustCounts=True)
     assert ds.numRecords == 133
     assert len(ds) == 133
     assert ds.totalLength == 274217
     assert ds._index is None
     assert len(ds._openReaders) == 0
コード例 #15
0
class TestScatterSubreadsBarcoding(pbcommand.testkit.core.PbTestScatterApp):
    DRIVER_BASE = "python -m pbcoretools.tasks.scatter_subreads_bam2bam"
    INPUT_FILES = [
        # XXX not actually barcoded data, but it doesn't matter here
        pbtestdata.get_file("subreads-bam"),
        pbtestdata.get_file("barcodeset")
    ]
    MAX_NCHUNKS = 8
    RESOLVED_MAX_NCHUNKS = 8
    NCHUNKS_EXPECTED = 2
    CHUNK_KEYS = ("$chunk.subreadset_id", "$chunk.barcodeset_id")
コード例 #16
0
ファイル: test_utils.py プロジェクト: lqsae/pbcommand
    def test_get_dataset_metadata(self):
        import pbtestdata
        md = get_dataset_metadata(pbtestdata.get_file("subreads-xml"))
        assert md.metatype == "PacBio.DataSet.SubreadSet"

        from pbcore.io import SubreadSet
        ds = SubreadSet(pbtestdata.get_file("subreads-xml"))
        assert md.uuid == ds.uuid

        with pytest.raises(Exception) as e:
            get_dataset_metadata(None)
コード例 #17
0
class TestPbalignMinorVariants(pbcommand.testkit.PbTestApp):
    DRIVER_BASE = "python -m pbalign.tasks.align_minorvariants"
    INPUT_FILES = [
        pbtestdata.get_file("rsii-ccs"),
        pbtestdata.get_file("lambdaNEB")
    ]

    def run_after(self, rtc, output_dir):
        ds_out = openDataSet(rtc.task.output_files[0])
        self.assertTrue(isinstance(ds_out, ConsensusAlignmentSet),
                        type(ds_out).__name__)
コード例 #18
0
 def test_gather_alignments_trust_counts(self):
     f1 = pbtestdata.get_file("aligned-xml")
     f2 = pbtestdata.get_file("aligned-ds-2")
     tmp_out = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name
     args = [
         "dataset", "create", "--trustCounts",
         tmp_out, f1, f2
     ]
     assert subprocess.check_call(args) == 0
     ds = AlignmentSet(tmp_out, trustCounts=True)
     assert ds.numRecords == 133
     assert ds.totalLength == 274217
コード例 #19
0
class TestPbreportMappingStatsHGAP(pbcommand.testkit.PbTestApp):
    DRIVER_BASE = "python -m pbreports.report.mapping_stats_hgap"
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("aligned-internal-subreads"),
        pbtestdata.get_file("internal-subreads")
    ]

    def run_after(self, rtc, output_dir):
        r = load_report_from_json(rtc.task.output_files[0])
        a = r.attributes[0]
        self.assertEqual(a.id, Constants.A_PCT_MAPPED)
        self.assertAlmostEqual(a.value, 0.9137, delta=0.0001)
コード例 #20
0
class TestPbreportTopVariants(pbcommand.testkit.PbTestApp):
    from pbreports.report.top_variants import Constants
    DRIVER_BASE = "python -m pbreports.report.top_variants "
    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("variants-gff"),
        pbtestdata.get_file("lambda-fasta")
    ]
    TASK_OPTIONS = {
        Constants.HOW_MANY_ID: Constants.HOW_MANY_DEFAULT,
        Constants.BATCH_SORT_SIZE_ID: Constants.BATCH_SORT_SIZE_DEFAULT,
    }
コード例 #21
0
class TestEstimateLimaMemory(PbIntegrationBase):
    TINY_DATA = pbtestdata.get_file("subreads-sequel")
    TINY_BARCODES = pbtestdata.get_file("barcodeset")
    BIG_BARCODES = "/pbi/dept/secondary/siv/barcodes/Sequel_RSII_384_barcodes_v1/Sequel_RSII_384_barcodes_v1.barcodeset.xml"
    BIG_DATA = "/pbi/dept/secondary/siv/testdata/Spider/all4mers/rSPOC1_20180629_223342/1_A01/mSPOC1_180629_223410.subreadset.xml"
    CCS_DATA = "/pbi/dept/secondary/siv/testdata/SA3-Sequel/bcol/m54119_161211_175055.consensusreadset.xml"

    def test_estimate_lima_memory(self):
        mem_gb = estimate_lima_memory(self.TINY_BARCODES, self.TINY_DATA, True)
        assert mem_gb == 2
        # this is silly of course.  but it's technically possible with the
        # Sequel II system, so we might as well just deal with it
        mem_gb = estimate_lima_memory(self.BIG_BARCODES, self.BIG_DATA, False)
        assert mem_gb == 2752
        # this is a more realistic case - 147K barcode pairs but the BAM file
        # is small enough to fit in the default footprint
        mem_gb = estimate_lima_memory(self.BIG_BARCODES, self.CCS_DATA, False)
        assert mem_gb == 7

    def test_integration_tiny(self):
        args = [
            "python3", "-m", "pbcoretools.tasks.memory.estimate_lima_memory",
            self.TINY_BARCODES, self.TINY_DATA, "--symmetric"
        ]
        self._check_call(args)
        with open("lima_mem_gb.txt") as txt_out:
            assert txt_out.read() == "2"

    def test_integration_big(self):
        args = [
            "python3", "-m", "pbcoretools.tasks.memory.estimate_lima_memory",
            self.BIG_BARCODES, self.BIG_DATA, "--asymmetric"
        ]
        self._check_call(args)
        with open("lima_mem_gb.txt") as txt_out:
            assert txt_out.read() == "2752"

    def test_defined_biosamples(self):
        # XXX awful dependency but it makes testing easier
        from pbcoretools.file_utils import set_bio_samples
        ds_tmp = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        bc = openDataSet(self.BIG_BARCODES)
        with openDataSet(self.BIG_DATA, trustCounts=True) as ds:
            bcs = [("bc1001--bc1{:03d}".format(x), "Sample {}".format(x))
                   for x in range(384)]
            set_bio_samples(ds, bcs)
            ds.write(ds_tmp)
        mem_gb = estimate_lima_memory(self.BIG_BARCODES, ds_tmp, False)
        assert mem_gb == 2
コード例 #22
0
ファイル: test_utils.py プロジェクト: mpkocher/pbcommand
 def test_get_dataset_metadata(self):
     try:
         import pbtestdata
     except ImportError:
         raise unittest.SkipTest("pbtestdata not available, skipping")
     else:
         md = get_dataset_metadata(pbtestdata.get_file("subreads-xml"))
         self.assertEqual(md.metatype, "PacBio.DataSet.SubreadSet")
         try:
             from pbcore.io import SubreadSet
         except ImportError:
             raise unittest.SkipTest("pbcore not available, skipping")
         else:
             ds = SubreadSet(pbtestdata.get_file("subreads-xml"))
             self.assertEqual(md.uuid, ds.uuid)
コード例 #23
0
class TestPbalign(pbcommand.testkit.PbTestApp):
    DRIVER_BASE = "pbalign "
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("subreads-xml"),
        pbtestdata.get_file("lambdaNEB")
    ]
    TASK_OPTIONS = {
        "pbalign.task_options.algorithm_options": "--holeNumbers 1-1000,30000-30500,60000-60600,100000-100500",
    }

    def run_after(self, rtc, output_dir):
        ds_out = openDataSet(rtc.task.output_files[0])
        self.assertTrue(isinstance(ds_out, AlignmentSet),
                        type(ds_out).__name__)
コード例 #24
0
class TestToolContract(pbcommand.testkit.PbTestApp):
    DATA_DIR = op.join(LOCAL_DATA, "variants")
    DRIVER_BASE = "python -m pbreports.report.variants"
    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
    REQUIRES_PBCORE = True
    INPUT_FILES = [
        pbtestdata.get_file("lambda-fasta"),
        pbtestdata.get_file("consensus-summary-gff"),
        pbtestdata.get_file("variants-gff")
    ]
    TASK_OPTIONS = {
        "pbreports.task_options.max_contigs": 25,
        "pbreports.task_options.dpi": 60,
        "pbreports.task_options.dumpdata": True,
    }
コード例 #25
0
    def test_make_filter_stats_report_sts_xml(self):
        """
        Test the content of the filter report generated from an sts.xml
        """
        sts_xml = pbtestdata.get_file("stats-xml")
        rpt = make_filter_report(sts_xml, self.get_output_dir())
        d = json.loads(rpt.to_json())
        self._compare_attribute_values(report_d=d,
                                       expected_d={
                                           Constants.A_NBASES: 1672335649,
                                           Constants.A_NREADS: 394658,
                                           Constants.A_READ_N50: 7750,
                                           Constants.A_READ_LENGTH: 4237
                                       })
        self.assertTrue(
            os.path.exists(
                os.path.join(self.get_output_dir(), 'readLenDist0.png')))
        # self.assertTrue(os.path.exists(os.path.join(
        #    self.get_output_dir(),
        #    'readQualDist0.png')))

        # these are from a raw STS file
        self.assertEqual(len(rpt._dataset_uuids), 0,
                         "Incorrect report datasets uuids")
        print pformat(rpt.to_dict())
        validate_report_complete(self, rpt)
コード例 #26
0
 def setup_class(cls):
     bam_files = []
     with SubreadSet(pbtestdata.get_file("barcoded-subreadset")) as ds_in:
         for er in ds_in.externalResources:
             bam_files.append(er.bam)
     with SubreadSet(*bam_files, strict=True) as ds_out:
         ds_out.write(cls.INPUT_FILE)
コード例 #27
0
 def test_integration(self):
     ccs_barcoded = pbtestdata.get_file("ccs-barcoded")
     datastore = tempfile.NamedTemporaryFile(suffix=".datastore.json").name
     lima_out = tempfile.NamedTemporaryFile(
         suffix=".consensusreadset.xml").name
     ccs_in = tempfile.NamedTemporaryFile(
         suffix=".consensusreadset.xml").name
     with ConsensusReadSet(ccs_barcoded) as ccs_tmp:
         ccs_tmp.name = "My Data (filtered)"
         ccs_tmp.tags = "ccs,filtered"
         ccs_tmp.write(ccs_in)
         ccs_tmp.name = "lima out"
         ccs_tmp.write(lima_out)
     ds = DataStore([
         DataStoreFile(uuid.uuid4(), "lima", FileTypes.DS_CCS.file_type_id,
                       lima_out)
     ])
     ds.write_json(datastore)
     args = [
         "python3", "-m", "pbcoretools.tasks.make_trimmed_dataset",
         datastore, ccs_in
     ]
     self._check_call(args)
     with ConsensusReadSet("trimmed.consensusreadset.xml",
                           trustCounts=True) as ccs_out:
         assert ccs_out.numRecords > 0
         assert ccs_out.name == "My Data (trimmed)"
         assert ccs_out.tags == "ccs"
コード例 #28
0
 def test__read_in_indexed_alignmentset(self):
     bam = pbtestdata.get_file("aligned-bam")
     data = _read_in_indexed_alignmentset(bam)
     self.assertTrue(all([row[2] == 254 for row in data]))
     self.assertEqual(len(data), 112)
     self.assertEqual(data[-1][0], 605)
     self.assertTrue(0.927 < data[-1][1] < 0.928)
コード例 #29
0
 def test__read_in_indexed_alignmentset(self):
     bam = pbtestdata.get_file("aligned-bam")
     data = _read_in_indexed_alignmentset(bam)
     self.assertTrue(all([row[2]==254 for row in data]))
     self.assertEqual(len(data), 112)
     self.assertEqual(data[-1][0], 605)
     self.assertTrue(0.927 < data[-1][1] < 0.928)
コード例 #30
0
class TestIntegrationMappingStatsReport(unittest.TestCase):
    ALIGNMENTS = pbtestdata.get_file("aligned-bam")

    def setUp(self):
        self.output_dir = tempfile.mkdtemp(suffix="_mapping_stats")
        self.aligned_reads_bam = self.ALIGNMENTS
        t = tempfile.NamedTemporaryFile(delete=False,
                                        suffix="mapping_report.json")
        t.close()
        self.report_json = t.name

    def test_basic(self):
        cmd = _to_cmd(self.ALIGNMENTS, self.report_json)
        rcode = run_backticks(cmd)
        self.assertEqual(rcode, 0)
        with open(self.report_json, 'r') as f:
            s = json.load(f)
            log.info("JsonReport: ")
            log.info(pprint.pformat(s, indent=4))
        report = dict_to_report(s)
        self.assertIsNotNone(report)
        self.assertEqual(len(report.tables), 1)
        log.info(str(report.tables[0]))
        validate_report_metadata(self, report, spec)
        validate_report_complete(self, report)
コード例 #31
0
 def test_update_barcoded_sample_metadata(self):
     datastore_tmp = tempfile.NamedTemporaryFile(
         suffix=".datastore.json").name
     barcodes = pbtestdata.get_file("barcodeset")
     ds = split_barcoded_dataset(self.SUBREADS)
     ds.write_json(datastore_tmp)
     base_dir = tempfile.mkdtemp()
     datastore = update_barcoded_sample_metadata(base_dir, datastore_tmp,
                                                 self.SUBREADS, barcodes)
     validate_barcoded_datastore_files(self, self.SUBREADS, datastore)
     # now with use_barcode_uuids=False
     datastore = update_barcoded_sample_metadata(base_dir,
                                                 datastore_tmp,
                                                 self.SUBREADS,
                                                 barcodes,
                                                 use_barcode_uuids=False)
     validate_barcoded_datastore_files(self,
                                       self.SUBREADS,
                                       datastore,
                                       use_barcode_uuids=False)
     # test that it doesn't break with no collection metadata
     ss = SubreadSet(self.SUBREADS)
     ss.metadata.collections = None
     ss_tmp = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ss.write(ss_tmp)
     ds = split_barcoded_dataset(ss_tmp)
     ds.write_json(datastore_tmp)
     base_dir = tempfile.mkdtemp()
     datastore = update_barcoded_sample_metadata(base_dir, datastore_tmp,
                                                 self.SUBREADS, barcodes)
     validate_barcoded_datastore_files(self,
                                       self.SUBREADS,
                                       datastore,
                                       have_collection_metadata=False,
                                       number_of_expected_collections=0)
コード例 #32
0
 def test_integration_simple(self):
     ds_in = pbtestdata.get_file("ccs-sequel")
     args = [
         "python3", "-m", "pbcoretools.tasks.consolidate_reads_bam", ds_in
     ]
     self._check_call(args)
     assert op.isfile("reads.bam")
    def test_make_filter_stats_report_sts_xml(self):
        """
        Test the content of the filter report generated from an sts.xml
        """
        sts_xml = pbtestdata.get_file("stats-xml")
        rpt = make_filter_report(sts_xml, self.get_output_dir())
        d = json.loads(rpt.to_json())
        self._compare_attribute_values(
            report_d=d,
            expected_d={
                Constants.A_NBASES: 1672335649,
                Constants.A_NREADS: 394658,
                Constants.A_READ_N50: 7750,
                Constants.A_READ_LENGTH: 4237,
            },
        )
        self.assertTrue(os.path.exists(os.path.join(self.get_output_dir(), "readLenDist0.png")))
        # self.assertTrue(os.path.exists(os.path.join(
        #    self.get_output_dir(),
        #    'readQualDist0.png')))

        # these are from a raw STS file
        self.assertEqual(len(rpt._dataset_uuids), 0, "Incorrect report datasets uuids")
        print pformat(rpt.to_dict())
        validate_report_complete(self, rpt)
コード例 #34
0
 def test_provenance_record_ordering(self):
     import pbtestdata
     ds = SubreadSet(pbtestdata.get_file("subreads-sequel"), strict=True)
     ds.metadata.addParentDataSet(uuid.uuid4(), ds.datasetType, createdBy="AnalysisJob", timeStampedName="")
     tmp_out = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds.write(tmp_out)
     ds = SubreadSet(tmp_out, strict=True)
     tags = [r['tag'] for r in ds.metadata.record['children']]
     self.assertEqual(tags, ['TotalLength', 'NumRecords', 'Provenance', 'Collections', 'SummaryStats'])
コード例 #35
0
 def setUp(self):
     BAM_IN = pbtestdata.get_file("subreads-bam")
     ds = SubreadSet(BAM_IN, strict=True)
     chunks = ds.split(zmws=True, chunks=2, targetSize=2)
     assert len(chunks) == 2
     self.zmw_range = chunks[CHUNK_INDEX].zmwRanges[0][1:3]
     logging.info("zmwRanges[CHUNK_INDEX] = {r}".format(
         r=str(chunks[CHUNK_INDEX].zmwRanges)))
     logging.info("SubreadSet = {f}".format(f=self.INPUT_FILES[0]))
     chunks[CHUNK_INDEX].write(self.INPUT_FILES[0])
コード例 #36
0
 def test_ccs_barcodes_table(self):
     CCS_DS = pbtestdata.get_file("ccs-barcoded")
     ds = ConsensusReadSet(CCS_DS)
     r = to_report(ds, tempfile.mkdtemp())
     self.assertEqual([c.values for c in r.tables[1].columns[0:4]],
                      [["lbc1", "lbc3"], [1, 1], [1958, 1954], [1958, 1954]])
     self.assertAlmostEqual(r.tables[1].columns[4].values[0], 0.9724,
                            places=4)
     self.assertAlmostEqual(r.tables[1].columns[4].values[1], 0.9926,
                            places=4)
コード例 #37
0
    def test_exit_code_0(self):
        """
        Like a cram test. Assert exits with 0, even though region size is 0 See
        bug 25079
        """
        from pbcore.util.Process import backticks
        import tempfile
        ref = pbtestdata.get_file("lambda-fasta")
        tiny_reads = pbtestdata.get_file("aligned-xml")
        out = os.path.join(tempfile.mkdtemp(suffix="summ_cov"), 'gff')
        cmd = 'summarize_coverage --region_size=0 --num_regions=500 {a} {r} {g}'.format(
            a=tiny_reads, r=ref, g=out)

        o, c, m = backticks(cmd)
        log.info(cmd)
        if c is not 0:
            log.error(m)
            log.error(o)
            print(m)
        self.assertEquals(0, c)
        self.assertTrue(
            os.path.exists(os.path.join(out)))
 def test_adapter_exit_code_0(self):
     subreads_xml = pbtestdata.get_file("subreads-sequel")
     cmd = "adapter_xml {c} {r}".format(r="foo.json", c=subreads_xml)
     o, c, m = backticks(cmd)
     print "COMMAND: {c}".format(c=cmd)
     log.info(cmd)
     print "o: {o}".format(o=o)
     print "c: {c}".format(c=c)
     print "m: {m}".format(m=m)
     if c is not 0:
         log.error(m)
         log.error(o)
     self.assertEquals(0, c)
 def test_loading_exit_code_0(self):
     sts_xml = pbtestdata.get_file("stats-xml")
     cmd = "loading_xml {c} {r}".format(r="foo.json", c=sts_xml)
     o, c, m = backticks(cmd)
     print "COMMAND: {c}".format(c=cmd)
     log.info(cmd)
     print "o: {o}".format(o=o)
     print "c: {c}".format(c=c)
     print "m: {m}".format(m=m)
     if c is not 0:
         log.error(m)
         log.error(o)
     self.assertEquals(0, c)
コード例 #40
0
 def test_merge_biosamples(self):
     import pbtestdata
     ds1 = pbtestdata.get_file("subreads-biosample-1")
     ds2 = pbtestdata.get_file("subreads-biosample-2")
     # Case 1: two biosamples
     ds = SubreadSet(ds1, ds2)
     samples = [bs.name for bs in ds.metadata.bioSamples]
     self.assertEqual(samples, ["Alice", "Bob"])
     # Case 2: same biosample in both files
     ds = SubreadSet(ds1, ds1)
     samples = [bs.name for bs in ds.metadata.bioSamples]
     self.assertEqual(samples, ["Alice"])
     self.assertEqual(len(ds.metadata.bioSamples[0].DNABarcodes), 1)
     # Case 3: same biosample, different barcodes
     dsTmp = SubreadSet(ds1)
     dsTmp.metadata.bioSamples[0].DNABarcodes[0].name = "F7--R7"
     tmpFile = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     dsTmp.write(tmpFile)
     ds = SubreadSet(ds1, tmpFile)
     samples = [bs.name for bs in ds.metadata.bioSamples]
     self.assertEqual(samples, ["Alice"])
     bcs = [bc.name for bc in ds.metadata.bioSamples[0].DNABarcodes]
     self.assertEqual(bcs, ["F1--R1", "F7--R7"])
コード例 #41
0
    def setUpClass(cls):
        cls.output_dir = tempfile.mkdtemp(suffix="_mapping_stats")
        cls.aligned_reads_xml = pbtestdata.get_file("rsii-ccs-aligned")
        t = tempfile.NamedTemporaryFile(
            delete=False, suffix="mapping_report.json")
        t.close()
        cls.report_json = t.name
        cls.report = mapping_stats_ccs.to_report(cls.aligned_reads_xml,
                                                 cls.output_dir)
        cls.report.write_json(cls.report_json)

        if isinstance(cls.report, Report):
            log.info(pprint.pformat(cls.report.to_dict()))
            for table in cls.report.tables:
                log.info(str(table))
 def test_filter_exit_code_0(self):
     tmpdir = tempfile.mkdtemp()
     cwd = os.getcwd()
     sts_xml = pbtestdata.get_file("subreads-sequel")
     cmd = "filter_stats_xml {c} {r}".format(r="foo.json", c=sts_xml)
     o, c, m = backticks(cmd)
     print "COMMAND: {c}".format(c=cmd)
     log.info(cmd)
     print "o: {o}".format(o=o)
     print "c: {c}".format(c=c)
     print "m: {m}".format(m=m)
     if c is not 0:
         log.error(m)
         log.error(o)
     self.assertEquals(0, c)
    def test_make_filter_stats_report_dataset(self):
        """
        Test the content of the filter report generated from a dataset
        """
        sts_xml = pbtestdata.get_file("subreads-sequel")
        rpt = make_filter_report(sts_xml, self.get_output_dir())
        d = json.loads(rpt.to_json())
        self._compare_attribute_values(
            report_d=d,
            expected_d={
                Constants.A_NBASES: 1672335649,
                Constants.A_NREADS: 394658,
                Constants.A_READ_N50: 7750,
                Constants.A_READ_LENGTH: 4237,
            },
        )

        self.assertTrue(os.path.exists(os.path.join(self.get_output_dir(), "readLenDist0.png")))
コード例 #44
0
def _get_bax2bam_inputs():
    """Little hackery to get the setup class Inputs and to avoid calls to
    setupclass if skiptest is used

    Nat: we want to test that this behaves properly when multiple movies are
    supplied as input, so we make an HdfSubreadSet on the fly from various
    bax files in testdata
    """
    if HAVE_DATA_AND_BAX2BAM:
        hdf_subread_xml = tempfile.NamedTemporaryFile(suffix=".hdfsubreadset.xml").name

        bax_files = (SIV_DATA_DIR + "/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.bax.h5",
                     pbtestdata.get_file("rsii-bax-h5"))
        ds = HdfSubreadSet(*bax_files)
        ds.name = "lambda_rsii"
        assert len(set([f.movieName for f in ds.resourceReaders()])) == 2
        ds.write(hdf_subread_xml)
        return [hdf_subread_xml]
    else:
        # Assume the test data isn't found and the test won't be run
        return ["/path/to/this-test-should-be-skipped.txt"]
コード例 #45
0
 def test_consensus_read_set_ref(self):
     import pbtestdata
     ds = ConsensusReadSet(pbtestdata.get_file("ccs-sequel"), strict=True)
     uuid = ds.metadata.collections[0].consensusReadSetRef.uuid
     self.assertEqual(uuid, "5416f525-d3c7-496b-ba8c-18d7ec1b4499")
コード例 #46
0
 def _generate_chunk_output_file(self, i=None):
     return self._copy_mock_output_file(pbtestdata.get_file("ccs-bam-aligned"))
コード例 #47
0
 def _generate_chunk_output_file(self, i=None):
     return self._copy_mock_output_file(pbtestdata.get_file("subreads-bam"))
コード例 #48
0
 def setUp(self):
     self.barcodes = pbtestdata.get_file("barcodeset")
     self.subreads = pbtestdata.get_file("barcoded-subreadset")
     self.ccs = False
コード例 #49
0
ファイル: test_bamsieve.py プロジェクト: Debian/pbcoretools
import shutil
import os.path as op
import os

from pbcore.io import openDataFile, openDataSet, BamReader

import pbtestdata

from pbcoretools import bamSieve

DATA_DIR = op.join(op.dirname(op.dirname(__file__)), "data")
SUBREADS1 = op.join(DATA_DIR, "tst_1_subreads.bam")
DS1 = op.join(DATA_DIR, "tst_1.subreadset.xml")
SUBREADS2 = op.join(DATA_DIR, "tst_3_subreads.bam")
DS2 = op.join(DATA_DIR, "tst_3.subreadset.xml")
SUBREADS3 = pbtestdata.get_file("subreads-bam")
SUBREADS4 = pbtestdata.get_file("aligned-bam")
CCS = pbtestdata.get_file("ccs-bam")
BARCODED = pbtestdata.get_file("barcoded-subreads-bam")
BARCODED_DS = pbtestdata.get_file("barcoded-subreadset")

class TestBamSieve(unittest.TestCase):

    def test_whitelist(self):
        ofn = tempfile.NamedTemporaryFile(suffix=".bam").name
        WHITELIST = set([24962, 32901, 30983])

        def _run_with_whitelist(wl):
            rc = bamSieve.filter_reads(
                input_bam=SUBREADS3,
                output_bam=ofn,
コード例 #50
0
 def setUp(self):
     self.aln_path = pbtestdata.get_file("aligned-xml")
     self.gff_path = pbtestdata.get_file("alignment-summary-gff")
     self.ref_path = pbtestdata.get_file("lambda-fasta")
     self.selected_reference = None
コード例 #51
0
 def getAlignmentSet(self):
     return pbtestdata.get_file("aligned-bam")