def test_dataset_split_multi_movie(self): ds1 = pbtestdata.get_file("subreads-sequel") ds2 = pbtestdata.get_file("subreads-xml") tmp_ds = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name with SubreadSet(ds1, ds2) as ds: ds.write(tmp_ds) outdir = tempfile.mkdtemp(suffix="dataset-unittest") base_args = [ "dataset", "split", "--maxChunks", "4", "--targetSize", "1", "--prefix", "tst_multi_ds" ] def run_and_validate(args, ds_sizes): outdir = tempfile.mkdtemp(suffix="dataset-unittest") final_args = base_args + args + ["--outdir", outdir, tmp_ds] self._check_cmd(" ".join(final_args)) dss = [ openDataSet(op.join(outdir, fn)) for fn in sorted(os.listdir(outdir)) ] assert [len(ds) for ds in dss] == ds_sizes run_and_validate(["--zmws"], [52, 22, 42, 21]) #run_and_validate(["--auto"], [8, 12, 54, 63]) run_and_validate(["--zmws", "--keepReadGroups"], [8, 12, 54, 63])
def test_dataset_create_set_sample_names(self): sample_args = "--well-sample-name WELLSAMPLE --bio-sample-name BIOSAMPLE".split( ) outfile = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name cmd = " ".join([ "dataset", "create", "--force", outfile, pbtestdata.get_file("subreads-bam") ] + sample_args) self._run_cmd_with_output(cmd, outfile) with SubreadSet(outfile) as ds: assert len(ds.metadata.collections) == 1 assert ds.metadata.collections[0].wellSample.name == "WELLSAMPLE" assert ds.metadata.collections[0].wellSample.bioSamples[ 0].name == "BIOSAMPLE" assert len(ds.metadata.collections[0].wellSample.bioSamples) == 1 # now with existing samples outfile = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name cmd = " ".join([ "dataset", "create", "--force", outfile, pbtestdata.get_file("barcoded-subreadset") ] + sample_args) self._run_cmd_with_output(cmd, outfile) with SubreadSet(outfile) as ds: assert len(ds.metadata.collections) == 1 assert ds.metadata.collections[0].wellSample.name == "WELLSAMPLE" biosamples = { s.name for s in ds.metadata.collections[0].wellSample.bioSamples } assert biosamples == {"BIOSAMPLE"}
def test_get_dataset_size(self): tiny_xml = pbtestdata.get_file("subreads-sequel") m = get_dataset_size(tiny_xml, True, True) assert m.numRecords == 20 assert m.totalLengthMb == 1 assert m.indexSizeGb == 2 assert m.numResources == 1 and m.numFilters == 0 m = get_dataset_size(tiny_xml, False, False) assert m.numRecords == 20 assert m.totalLengthMb == 1 assert m.indexSizeGb == 1 m = get_dataset_size(self.BIG_DATA, True, True) assert m.numRecords == 805580876 assert m.totalLengthMb == 271330 assert m.indexSizeGb == 45 assert m.numResources == 1 and m.numFilters == 0 m = get_dataset_size(self.TINY_REF, False, False) assert m.numRecords == 1 assert m.totalLengthMb == 1 m = get_dataset_size(self.BIG_REF, False, False) assert m.numRecords == 86 assert m.totalLengthMb == 2993 ds_aln = pbtestdata.get_file("aligned-ds-2") m = get_dataset_size(ds_aln, True, True) assert m.numRecords == 21 assert m.numResources == 2
def setUpClass(cls): cls.xml_path = pbtestdata.get_file("aligned-xml") cls.ds_reader = AlignmentSet(cls.xml_path, strict=True, reference=pbtestdata.get_file("lambda-fasta")) cls.bam_readers = cls.ds_reader.resourceReaders() cls.interval_lists = summarize_coverage.build_interval_lists( cls.bam_readers)
def test_split_zmws_around_read_groups(self): ds1 = pbtestdata.get_file("subreads-xml") ds2 = pbtestdata.get_file("subreads-sequel") ds = SubreadSet(ds1, ds2) assert len(ds) == 137 # this is still the default behavior chunks = list(ds.split(chunks=2, zmws=True, breakReadGroups=True)) assert len(chunks[0]) == 72 assert len(chunks[1]) == 65 # don't break up movies chunks = list(ds.split(chunks=2, zmws=True, breakReadGroups=False)) assert len(chunks[0]) == 20 assert len(chunks[1]) == 117 assert np.all(chunks[0].index.qId == -2081539485) assert np.all(chunks[1].index.qId == -1197849594) chunks = list( ds.split(chunks=4, targetSize=1, zmws=True, breakReadGroups=False)) assert [len(c) for c in chunks] == [8, 12, 54, 63] assert np.all(chunks[0].index.qId == -2081539485) assert np.all(chunks[1].index.qId == -2081539485) assert np.all(chunks[2].index.qId == -1197849594) assert np.all(chunks[3].index.qId == -1197849594) # control: single-movie dataset ds = SubreadSet(ds1) chunks1 = list(ds.split(chunks=4, zmws=True, breakReadGroups=False)) chunks2 = list(ds.split(chunks=4, zmws=True, breakReadGroups=True)) assert [len(x) for x in chunks1] == [len(y) for y in chunks2]
class TestToolContractHgap(pbcommand.testkit.core.PbTestApp): DRIVER_BASE = "python -m pbreports.report.coverage_hgap" INPUT_FILES = [ pbtestdata.get_file("lambda-fasta"), pbtestdata.get_file("alignment-summary-gff") ] IS_DISTRIBUTED = True RESOLVED_IS_DISTRIBUTED = True
def test_get_index_size_bytes(self): import pbtestdata ds = openDataSet(pbtestdata.get_file("subreads-sequel")) assert get_index_size_bytes(ds.externalResources[0].pbi) == 580 ds2 = openDataSet(pbtestdata.get_file("ccs-barcoded")) assert get_index_size_bytes(ds2.externalResources[0].pbi) == 68 ds3 = openDataSet(pbtestdata.get_file("aligned-xml")) assert get_index_size_bytes(ds3.externalResources[0].pbi) == 7504
def test_run_bamsieve_extract_unmapped(self): mapped = _make_filtered(pbtestdata.get_file("aligned-xml")) subreads = pbtestdata.get_file("subreads-xml") args = [ "bamsieve", "--subreads", "--blacklist", mapped, subreads, "unmapped.subreads.bam" ] self._check_call(args) assert_no_reads_in_common(self, mapped, "unmapped.subreads.bam")
def setUpClass(cls): cls.xml_path = pbtestdata.get_file("aligned-xml") cls.ds_reader = AlignmentSet( cls.xml_path, strict=True, reference=pbtestdata.get_file("lambda-fasta")) cls.bam_readers = cls.ds_reader.resourceReaders() cls.interval_lists = summarize_coverage.build_interval_lists( cls.bam_readers)
class TestScatterCCSReference(pbcommand.testkit.core.PbTestScatterApp): DRIVER_BASE = "python -m pbcoretools.tasks.scatter_ccs_reference" INPUT_FILES = [ pbtestdata.get_file("rsii-ccs"), pbtestdata.get_file("lambdaNEB") ] MAX_NCHUNKS = 8 RESOLVED_MAX_NCHUNKS = 8 CHUNK_KEYS = ("$chunk.ccsset_id", "$chunk.reference_id")
class TestScatterSubreadReference(pbcommand.testkit.core.PbTestScatterApp): DRIVER_BASE = "python -m pbcoretools.tasks.scatter_subread_reference" INPUT_FILES = [ pbtestdata.get_file("subreads-xml"), pbtestdata.get_file("lambdaNEB") ] MAX_NCHUNKS = 3 RESOLVED_MAX_NCHUNKS = 3 CHUNK_KEYS = ("$chunk.subreadset_id", "$chunk.reference_id")
class TestSummarizeCoverage(pbcommand.testkit.PbTestApp): DRIVER_BASE = "python -m pbreports.report.summarize_coverage.summarize_coverage " DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract " DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract " REQUIRES_PBCORE = True INPUT_FILES = [ pbtestdata.get_file("aligned-xml"), pbtestdata.get_file("lambda-fasta") ] TASK_OPTIONS = {}
class TestSummarizeConsensus(pbcommand.testkit.PbTestApp): DRIVER_BASE = "summarizeConsensus" DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract " DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract " REQUIRES_PBCORE = True INPUT_FILES = [ pbtestdata.get_file("alignment-summary-gff"), pbtestdata.get_file("variants-gff") ] TASK_OPTIONS = {}
def test_trust_counts(self): import pbtestdata f1 = pbtestdata.get_file("aligned-xml") f2 = pbtestdata.get_file("aligned-ds-2") ds = openDataFile(f1, f2, trustCounts=True) assert ds.numRecords == 133 assert len(ds) == 133 assert ds.totalLength == 274217 assert ds._index is None assert len(ds._openReaders) == 0
class TestScatterSubreadsBarcoding(pbcommand.testkit.core.PbTestScatterApp): DRIVER_BASE = "python -m pbcoretools.tasks.scatter_subreads_bam2bam" INPUT_FILES = [ # XXX not actually barcoded data, but it doesn't matter here pbtestdata.get_file("subreads-bam"), pbtestdata.get_file("barcodeset") ] MAX_NCHUNKS = 8 RESOLVED_MAX_NCHUNKS = 8 NCHUNKS_EXPECTED = 2 CHUNK_KEYS = ("$chunk.subreadset_id", "$chunk.barcodeset_id")
def test_get_dataset_metadata(self): import pbtestdata md = get_dataset_metadata(pbtestdata.get_file("subreads-xml")) assert md.metatype == "PacBio.DataSet.SubreadSet" from pbcore.io import SubreadSet ds = SubreadSet(pbtestdata.get_file("subreads-xml")) assert md.uuid == ds.uuid with pytest.raises(Exception) as e: get_dataset_metadata(None)
class TestPbalignMinorVariants(pbcommand.testkit.PbTestApp): DRIVER_BASE = "python -m pbalign.tasks.align_minorvariants" INPUT_FILES = [ pbtestdata.get_file("rsii-ccs"), pbtestdata.get_file("lambdaNEB") ] def run_after(self, rtc, output_dir): ds_out = openDataSet(rtc.task.output_files[0]) self.assertTrue(isinstance(ds_out, ConsensusAlignmentSet), type(ds_out).__name__)
def test_gather_alignments_trust_counts(self): f1 = pbtestdata.get_file("aligned-xml") f2 = pbtestdata.get_file("aligned-ds-2") tmp_out = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name args = [ "dataset", "create", "--trustCounts", tmp_out, f1, f2 ] assert subprocess.check_call(args) == 0 ds = AlignmentSet(tmp_out, trustCounts=True) assert ds.numRecords == 133 assert ds.totalLength == 274217
class TestPbreportMappingStatsHGAP(pbcommand.testkit.PbTestApp): DRIVER_BASE = "python -m pbreports.report.mapping_stats_hgap" REQUIRES_PBCORE = True INPUT_FILES = [ pbtestdata.get_file("aligned-internal-subreads"), pbtestdata.get_file("internal-subreads") ] def run_after(self, rtc, output_dir): r = load_report_from_json(rtc.task.output_files[0]) a = r.attributes[0] self.assertEqual(a.id, Constants.A_PCT_MAPPED) self.assertAlmostEqual(a.value, 0.9137, delta=0.0001)
class TestPbreportTopVariants(pbcommand.testkit.PbTestApp): from pbreports.report.top_variants import Constants DRIVER_BASE = "python -m pbreports.report.top_variants " DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract " DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract " REQUIRES_PBCORE = True INPUT_FILES = [ pbtestdata.get_file("variants-gff"), pbtestdata.get_file("lambda-fasta") ] TASK_OPTIONS = { Constants.HOW_MANY_ID: Constants.HOW_MANY_DEFAULT, Constants.BATCH_SORT_SIZE_ID: Constants.BATCH_SORT_SIZE_DEFAULT, }
class TestEstimateLimaMemory(PbIntegrationBase): TINY_DATA = pbtestdata.get_file("subreads-sequel") TINY_BARCODES = pbtestdata.get_file("barcodeset") BIG_BARCODES = "/pbi/dept/secondary/siv/barcodes/Sequel_RSII_384_barcodes_v1/Sequel_RSII_384_barcodes_v1.barcodeset.xml" BIG_DATA = "/pbi/dept/secondary/siv/testdata/Spider/all4mers/rSPOC1_20180629_223342/1_A01/mSPOC1_180629_223410.subreadset.xml" CCS_DATA = "/pbi/dept/secondary/siv/testdata/SA3-Sequel/bcol/m54119_161211_175055.consensusreadset.xml" def test_estimate_lima_memory(self): mem_gb = estimate_lima_memory(self.TINY_BARCODES, self.TINY_DATA, True) assert mem_gb == 2 # this is silly of course. but it's technically possible with the # Sequel II system, so we might as well just deal with it mem_gb = estimate_lima_memory(self.BIG_BARCODES, self.BIG_DATA, False) assert mem_gb == 2752 # this is a more realistic case - 147K barcode pairs but the BAM file # is small enough to fit in the default footprint mem_gb = estimate_lima_memory(self.BIG_BARCODES, self.CCS_DATA, False) assert mem_gb == 7 def test_integration_tiny(self): args = [ "python3", "-m", "pbcoretools.tasks.memory.estimate_lima_memory", self.TINY_BARCODES, self.TINY_DATA, "--symmetric" ] self._check_call(args) with open("lima_mem_gb.txt") as txt_out: assert txt_out.read() == "2" def test_integration_big(self): args = [ "python3", "-m", "pbcoretools.tasks.memory.estimate_lima_memory", self.BIG_BARCODES, self.BIG_DATA, "--asymmetric" ] self._check_call(args) with open("lima_mem_gb.txt") as txt_out: assert txt_out.read() == "2752" def test_defined_biosamples(self): # XXX awful dependency but it makes testing easier from pbcoretools.file_utils import set_bio_samples ds_tmp = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name bc = openDataSet(self.BIG_BARCODES) with openDataSet(self.BIG_DATA, trustCounts=True) as ds: bcs = [("bc1001--bc1{:03d}".format(x), "Sample {}".format(x)) for x in range(384)] set_bio_samples(ds, bcs) ds.write(ds_tmp) mem_gb = estimate_lima_memory(self.BIG_BARCODES, ds_tmp, False) assert mem_gb == 2
def test_get_dataset_metadata(self): try: import pbtestdata except ImportError: raise unittest.SkipTest("pbtestdata not available, skipping") else: md = get_dataset_metadata(pbtestdata.get_file("subreads-xml")) self.assertEqual(md.metatype, "PacBio.DataSet.SubreadSet") try: from pbcore.io import SubreadSet except ImportError: raise unittest.SkipTest("pbcore not available, skipping") else: ds = SubreadSet(pbtestdata.get_file("subreads-xml")) self.assertEqual(md.uuid, ds.uuid)
class TestPbalign(pbcommand.testkit.PbTestApp): DRIVER_BASE = "pbalign " REQUIRES_PBCORE = True INPUT_FILES = [ pbtestdata.get_file("subreads-xml"), pbtestdata.get_file("lambdaNEB") ] TASK_OPTIONS = { "pbalign.task_options.algorithm_options": "--holeNumbers 1-1000,30000-30500,60000-60600,100000-100500", } def run_after(self, rtc, output_dir): ds_out = openDataSet(rtc.task.output_files[0]) self.assertTrue(isinstance(ds_out, AlignmentSet), type(ds_out).__name__)
class TestToolContract(pbcommand.testkit.PbTestApp): DATA_DIR = op.join(LOCAL_DATA, "variants") DRIVER_BASE = "python -m pbreports.report.variants" DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract " DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract " REQUIRES_PBCORE = True INPUT_FILES = [ pbtestdata.get_file("lambda-fasta"), pbtestdata.get_file("consensus-summary-gff"), pbtestdata.get_file("variants-gff") ] TASK_OPTIONS = { "pbreports.task_options.max_contigs": 25, "pbreports.task_options.dpi": 60, "pbreports.task_options.dumpdata": True, }
def test_make_filter_stats_report_sts_xml(self): """ Test the content of the filter report generated from an sts.xml """ sts_xml = pbtestdata.get_file("stats-xml") rpt = make_filter_report(sts_xml, self.get_output_dir()) d = json.loads(rpt.to_json()) self._compare_attribute_values(report_d=d, expected_d={ Constants.A_NBASES: 1672335649, Constants.A_NREADS: 394658, Constants.A_READ_N50: 7750, Constants.A_READ_LENGTH: 4237 }) self.assertTrue( os.path.exists( os.path.join(self.get_output_dir(), 'readLenDist0.png'))) # self.assertTrue(os.path.exists(os.path.join( # self.get_output_dir(), # 'readQualDist0.png'))) # these are from a raw STS file self.assertEqual(len(rpt._dataset_uuids), 0, "Incorrect report datasets uuids") print pformat(rpt.to_dict()) validate_report_complete(self, rpt)
def setup_class(cls): bam_files = [] with SubreadSet(pbtestdata.get_file("barcoded-subreadset")) as ds_in: for er in ds_in.externalResources: bam_files.append(er.bam) with SubreadSet(*bam_files, strict=True) as ds_out: ds_out.write(cls.INPUT_FILE)
def test_integration(self): ccs_barcoded = pbtestdata.get_file("ccs-barcoded") datastore = tempfile.NamedTemporaryFile(suffix=".datastore.json").name lima_out = tempfile.NamedTemporaryFile( suffix=".consensusreadset.xml").name ccs_in = tempfile.NamedTemporaryFile( suffix=".consensusreadset.xml").name with ConsensusReadSet(ccs_barcoded) as ccs_tmp: ccs_tmp.name = "My Data (filtered)" ccs_tmp.tags = "ccs,filtered" ccs_tmp.write(ccs_in) ccs_tmp.name = "lima out" ccs_tmp.write(lima_out) ds = DataStore([ DataStoreFile(uuid.uuid4(), "lima", FileTypes.DS_CCS.file_type_id, lima_out) ]) ds.write_json(datastore) args = [ "python3", "-m", "pbcoretools.tasks.make_trimmed_dataset", datastore, ccs_in ] self._check_call(args) with ConsensusReadSet("trimmed.consensusreadset.xml", trustCounts=True) as ccs_out: assert ccs_out.numRecords > 0 assert ccs_out.name == "My Data (trimmed)" assert ccs_out.tags == "ccs"
def test__read_in_indexed_alignmentset(self): bam = pbtestdata.get_file("aligned-bam") data = _read_in_indexed_alignmentset(bam) self.assertTrue(all([row[2] == 254 for row in data])) self.assertEqual(len(data), 112) self.assertEqual(data[-1][0], 605) self.assertTrue(0.927 < data[-1][1] < 0.928)
def test__read_in_indexed_alignmentset(self): bam = pbtestdata.get_file("aligned-bam") data = _read_in_indexed_alignmentset(bam) self.assertTrue(all([row[2]==254 for row in data])) self.assertEqual(len(data), 112) self.assertEqual(data[-1][0], 605) self.assertTrue(0.927 < data[-1][1] < 0.928)
class TestIntegrationMappingStatsReport(unittest.TestCase): ALIGNMENTS = pbtestdata.get_file("aligned-bam") def setUp(self): self.output_dir = tempfile.mkdtemp(suffix="_mapping_stats") self.aligned_reads_bam = self.ALIGNMENTS t = tempfile.NamedTemporaryFile(delete=False, suffix="mapping_report.json") t.close() self.report_json = t.name def test_basic(self): cmd = _to_cmd(self.ALIGNMENTS, self.report_json) rcode = run_backticks(cmd) self.assertEqual(rcode, 0) with open(self.report_json, 'r') as f: s = json.load(f) log.info("JsonReport: ") log.info(pprint.pformat(s, indent=4)) report = dict_to_report(s) self.assertIsNotNone(report) self.assertEqual(len(report.tables), 1) log.info(str(report.tables[0])) validate_report_metadata(self, report, spec) validate_report_complete(self, report)
def test_update_barcoded_sample_metadata(self): datastore_tmp = tempfile.NamedTemporaryFile( suffix=".datastore.json").name barcodes = pbtestdata.get_file("barcodeset") ds = split_barcoded_dataset(self.SUBREADS) ds.write_json(datastore_tmp) base_dir = tempfile.mkdtemp() datastore = update_barcoded_sample_metadata(base_dir, datastore_tmp, self.SUBREADS, barcodes) validate_barcoded_datastore_files(self, self.SUBREADS, datastore) # now with use_barcode_uuids=False datastore = update_barcoded_sample_metadata(base_dir, datastore_tmp, self.SUBREADS, barcodes, use_barcode_uuids=False) validate_barcoded_datastore_files(self, self.SUBREADS, datastore, use_barcode_uuids=False) # test that it doesn't break with no collection metadata ss = SubreadSet(self.SUBREADS) ss.metadata.collections = None ss_tmp = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name ss.write(ss_tmp) ds = split_barcoded_dataset(ss_tmp) ds.write_json(datastore_tmp) base_dir = tempfile.mkdtemp() datastore = update_barcoded_sample_metadata(base_dir, datastore_tmp, self.SUBREADS, barcodes) validate_barcoded_datastore_files(self, self.SUBREADS, datastore, have_collection_metadata=False, number_of_expected_collections=0)
def test_integration_simple(self): ds_in = pbtestdata.get_file("ccs-sequel") args = [ "python3", "-m", "pbcoretools.tasks.consolidate_reads_bam", ds_in ] self._check_call(args) assert op.isfile("reads.bam")
def test_make_filter_stats_report_sts_xml(self): """ Test the content of the filter report generated from an sts.xml """ sts_xml = pbtestdata.get_file("stats-xml") rpt = make_filter_report(sts_xml, self.get_output_dir()) d = json.loads(rpt.to_json()) self._compare_attribute_values( report_d=d, expected_d={ Constants.A_NBASES: 1672335649, Constants.A_NREADS: 394658, Constants.A_READ_N50: 7750, Constants.A_READ_LENGTH: 4237, }, ) self.assertTrue(os.path.exists(os.path.join(self.get_output_dir(), "readLenDist0.png"))) # self.assertTrue(os.path.exists(os.path.join( # self.get_output_dir(), # 'readQualDist0.png'))) # these are from a raw STS file self.assertEqual(len(rpt._dataset_uuids), 0, "Incorrect report datasets uuids") print pformat(rpt.to_dict()) validate_report_complete(self, rpt)
def test_provenance_record_ordering(self): import pbtestdata ds = SubreadSet(pbtestdata.get_file("subreads-sequel"), strict=True) ds.metadata.addParentDataSet(uuid.uuid4(), ds.datasetType, createdBy="AnalysisJob", timeStampedName="") tmp_out = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name ds.write(tmp_out) ds = SubreadSet(tmp_out, strict=True) tags = [r['tag'] for r in ds.metadata.record['children']] self.assertEqual(tags, ['TotalLength', 'NumRecords', 'Provenance', 'Collections', 'SummaryStats'])
def setUp(self): BAM_IN = pbtestdata.get_file("subreads-bam") ds = SubreadSet(BAM_IN, strict=True) chunks = ds.split(zmws=True, chunks=2, targetSize=2) assert len(chunks) == 2 self.zmw_range = chunks[CHUNK_INDEX].zmwRanges[0][1:3] logging.info("zmwRanges[CHUNK_INDEX] = {r}".format( r=str(chunks[CHUNK_INDEX].zmwRanges))) logging.info("SubreadSet = {f}".format(f=self.INPUT_FILES[0])) chunks[CHUNK_INDEX].write(self.INPUT_FILES[0])
def test_ccs_barcodes_table(self): CCS_DS = pbtestdata.get_file("ccs-barcoded") ds = ConsensusReadSet(CCS_DS) r = to_report(ds, tempfile.mkdtemp()) self.assertEqual([c.values for c in r.tables[1].columns[0:4]], [["lbc1", "lbc3"], [1, 1], [1958, 1954], [1958, 1954]]) self.assertAlmostEqual(r.tables[1].columns[4].values[0], 0.9724, places=4) self.assertAlmostEqual(r.tables[1].columns[4].values[1], 0.9926, places=4)
def test_exit_code_0(self): """ Like a cram test. Assert exits with 0, even though region size is 0 See bug 25079 """ from pbcore.util.Process import backticks import tempfile ref = pbtestdata.get_file("lambda-fasta") tiny_reads = pbtestdata.get_file("aligned-xml") out = os.path.join(tempfile.mkdtemp(suffix="summ_cov"), 'gff') cmd = 'summarize_coverage --region_size=0 --num_regions=500 {a} {r} {g}'.format( a=tiny_reads, r=ref, g=out) o, c, m = backticks(cmd) log.info(cmd) if c is not 0: log.error(m) log.error(o) print(m) self.assertEquals(0, c) self.assertTrue( os.path.exists(os.path.join(out)))
def test_adapter_exit_code_0(self): subreads_xml = pbtestdata.get_file("subreads-sequel") cmd = "adapter_xml {c} {r}".format(r="foo.json", c=subreads_xml) o, c, m = backticks(cmd) print "COMMAND: {c}".format(c=cmd) log.info(cmd) print "o: {o}".format(o=o) print "c: {c}".format(c=c) print "m: {m}".format(m=m) if c is not 0: log.error(m) log.error(o) self.assertEquals(0, c)
def test_loading_exit_code_0(self): sts_xml = pbtestdata.get_file("stats-xml") cmd = "loading_xml {c} {r}".format(r="foo.json", c=sts_xml) o, c, m = backticks(cmd) print "COMMAND: {c}".format(c=cmd) log.info(cmd) print "o: {o}".format(o=o) print "c: {c}".format(c=c) print "m: {m}".format(m=m) if c is not 0: log.error(m) log.error(o) self.assertEquals(0, c)
def test_merge_biosamples(self): import pbtestdata ds1 = pbtestdata.get_file("subreads-biosample-1") ds2 = pbtestdata.get_file("subreads-biosample-2") # Case 1: two biosamples ds = SubreadSet(ds1, ds2) samples = [bs.name for bs in ds.metadata.bioSamples] self.assertEqual(samples, ["Alice", "Bob"]) # Case 2: same biosample in both files ds = SubreadSet(ds1, ds1) samples = [bs.name for bs in ds.metadata.bioSamples] self.assertEqual(samples, ["Alice"]) self.assertEqual(len(ds.metadata.bioSamples[0].DNABarcodes), 1) # Case 3: same biosample, different barcodes dsTmp = SubreadSet(ds1) dsTmp.metadata.bioSamples[0].DNABarcodes[0].name = "F7--R7" tmpFile = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name dsTmp.write(tmpFile) ds = SubreadSet(ds1, tmpFile) samples = [bs.name for bs in ds.metadata.bioSamples] self.assertEqual(samples, ["Alice"]) bcs = [bc.name for bc in ds.metadata.bioSamples[0].DNABarcodes] self.assertEqual(bcs, ["F1--R1", "F7--R7"])
def setUpClass(cls): cls.output_dir = tempfile.mkdtemp(suffix="_mapping_stats") cls.aligned_reads_xml = pbtestdata.get_file("rsii-ccs-aligned") t = tempfile.NamedTemporaryFile( delete=False, suffix="mapping_report.json") t.close() cls.report_json = t.name cls.report = mapping_stats_ccs.to_report(cls.aligned_reads_xml, cls.output_dir) cls.report.write_json(cls.report_json) if isinstance(cls.report, Report): log.info(pprint.pformat(cls.report.to_dict())) for table in cls.report.tables: log.info(str(table))
def test_filter_exit_code_0(self): tmpdir = tempfile.mkdtemp() cwd = os.getcwd() sts_xml = pbtestdata.get_file("subreads-sequel") cmd = "filter_stats_xml {c} {r}".format(r="foo.json", c=sts_xml) o, c, m = backticks(cmd) print "COMMAND: {c}".format(c=cmd) log.info(cmd) print "o: {o}".format(o=o) print "c: {c}".format(c=c) print "m: {m}".format(m=m) if c is not 0: log.error(m) log.error(o) self.assertEquals(0, c)
def test_make_filter_stats_report_dataset(self): """ Test the content of the filter report generated from a dataset """ sts_xml = pbtestdata.get_file("subreads-sequel") rpt = make_filter_report(sts_xml, self.get_output_dir()) d = json.loads(rpt.to_json()) self._compare_attribute_values( report_d=d, expected_d={ Constants.A_NBASES: 1672335649, Constants.A_NREADS: 394658, Constants.A_READ_N50: 7750, Constants.A_READ_LENGTH: 4237, }, ) self.assertTrue(os.path.exists(os.path.join(self.get_output_dir(), "readLenDist0.png")))
def _get_bax2bam_inputs(): """Little hackery to get the setup class Inputs and to avoid calls to setupclass if skiptest is used Nat: we want to test that this behaves properly when multiple movies are supplied as input, so we make an HdfSubreadSet on the fly from various bax files in testdata """ if HAVE_DATA_AND_BAX2BAM: hdf_subread_xml = tempfile.NamedTemporaryFile(suffix=".hdfsubreadset.xml").name bax_files = (SIV_DATA_DIR + "/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.bax.h5", pbtestdata.get_file("rsii-bax-h5")) ds = HdfSubreadSet(*bax_files) ds.name = "lambda_rsii" assert len(set([f.movieName for f in ds.resourceReaders()])) == 2 ds.write(hdf_subread_xml) return [hdf_subread_xml] else: # Assume the test data isn't found and the test won't be run return ["/path/to/this-test-should-be-skipped.txt"]
def test_consensus_read_set_ref(self): import pbtestdata ds = ConsensusReadSet(pbtestdata.get_file("ccs-sequel"), strict=True) uuid = ds.metadata.collections[0].consensusReadSetRef.uuid self.assertEqual(uuid, "5416f525-d3c7-496b-ba8c-18d7ec1b4499")
def _generate_chunk_output_file(self, i=None): return self._copy_mock_output_file(pbtestdata.get_file("ccs-bam-aligned"))
def _generate_chunk_output_file(self, i=None): return self._copy_mock_output_file(pbtestdata.get_file("subreads-bam"))
def setUp(self): self.barcodes = pbtestdata.get_file("barcodeset") self.subreads = pbtestdata.get_file("barcoded-subreadset") self.ccs = False
import shutil import os.path as op import os from pbcore.io import openDataFile, openDataSet, BamReader import pbtestdata from pbcoretools import bamSieve DATA_DIR = op.join(op.dirname(op.dirname(__file__)), "data") SUBREADS1 = op.join(DATA_DIR, "tst_1_subreads.bam") DS1 = op.join(DATA_DIR, "tst_1.subreadset.xml") SUBREADS2 = op.join(DATA_DIR, "tst_3_subreads.bam") DS2 = op.join(DATA_DIR, "tst_3.subreadset.xml") SUBREADS3 = pbtestdata.get_file("subreads-bam") SUBREADS4 = pbtestdata.get_file("aligned-bam") CCS = pbtestdata.get_file("ccs-bam") BARCODED = pbtestdata.get_file("barcoded-subreads-bam") BARCODED_DS = pbtestdata.get_file("barcoded-subreadset") class TestBamSieve(unittest.TestCase): def test_whitelist(self): ofn = tempfile.NamedTemporaryFile(suffix=".bam").name WHITELIST = set([24962, 32901, 30983]) def _run_with_whitelist(wl): rc = bamSieve.filter_reads( input_bam=SUBREADS3, output_bam=ofn,
def setUp(self): self.aln_path = pbtestdata.get_file("aligned-xml") self.gff_path = pbtestdata.get_file("alignment-summary-gff") self.ref_path = pbtestdata.get_file("lambda-fasta") self.selected_reference = None
def getAlignmentSet(self): return pbtestdata.get_file("aligned-bam")