def test_alignment_reference(self): rs1 = ReferenceSet(data.getXml(9)) fasta_res = rs1.externalResources[0] fasta_file = urlparse(fasta_res.resourceId).path ds1 = AlignmentSet(data.getXml(8), referenceFastaFname=rs1) aln_ref = None for aln in ds1: aln_ref = aln.reference() break self.assertTrue(aln_ref is not None) ds1 = AlignmentSet(data.getXml(8), referenceFastaFname=fasta_file) aln_ref = None for aln in ds1: aln_ref = aln.reference() break self.assertTrue(aln_ref is not None) ds1 = AlignmentSet(data.getXml(8)) ds1.addReference(fasta_file) aln_ref = None for aln in ds1: aln_ref = aln.reference() break self.assertTrue(aln_ref is not None)
def _make_alignmentset(file_name=None): bam = pbcore.data.getBamAndCmpH5()[0] ds = AlignmentSet(bam) if file_name is None: file_name = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name ds.write(file_name) return file_name
def test_loadMetadata(self): aln = AlignmentSet(data.getXml(no=8)) self.assertFalse(aln.metadata.collections) aln.loadMetadata('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.run.metadata.xml') self.assertTrue(aln.metadata.collections) sset_fn = ('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.subreadset.xml') sset = SubreadSet(sset_fn) orig_metadata = copy.deepcopy(sset.metadata) sset.metadata.collections = None self.assertFalse(sset.metadata.collections) sset.loadMetadata('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.run.metadata.xml') fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name sset.write(fn) validateFile(fn) validateFile(sset_fn) self.assertEqual(sset.metadata, orig_metadata) # load the wrong thing... sset_fn = ('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.subreadset.xml') sset = SubreadSet(sset_fn) orig_metadata = copy.deepcopy(sset.metadata) sset.metadata.collections = None self.assertFalse(sset.metadata.collections) with self.assertRaises(InvalidDataSetIOError): sset.loadMetadata('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.sts.xml')
def test_len_h5(self): # HdfSubreadSet # len means something else in bax/bas land. These numbers may actually # be correct... sset = HdfSubreadSet(data.getXml(17), strict=True) self.assertEqual(len(sset), 9) self.assertEqual(sset._length, (9, 128093)) self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 128093) self.assertEqual(sset.numRecords, 9) # AlignmentSet with cmp.h5 aln = AlignmentSet(upstreamData.getBamAndCmpH5()[1], strict=True) self.assertEqual(len(aln), 112) self.assertEqual(aln._length, (112, 59970)) self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 59970) self.assertEqual(aln.numRecords, 112)
def test_refWindows(self): ds = AlignmentSet(data.getBam()) dss = ds.split(chunks=2, contigs=True) self.assertEqual(len(dss), 2) log.debug(dss[0].filters) log.debug(dss[1].filters) self.assertTrue( '( rname = E.faecalis.2 ' in str(dss[0].filters) or '( rname = E.faecalis.2 ' in str(dss[1].filters)) ds = AlignmentSet(data.getBam()) ds.filters.addRequirement(rname=[('=', 'E.faecalis.2'), ('=', 'E.faecalis.2')], tStart=[('<', '99'), ('<', '299')], tEnd=[('>', '0'), ('>', '100')]) self.assertEqual(str(ds.filters), '( rname = E.faecalis.2 AND tstart ' '< 99 AND tend > 0 ) OR ( rname = ' 'E.faecalis.2 AND tstart < 299 AND tend > 100 )') self.assertEqual(ds.refWindows, [('E.faecalis.2', 0, 99), ('E.faecalis.2', 100, 299)])
def test_newUuid_random_cli(self): fn_orig = data.getXml(7) outdir = tempfile.mkdtemp(suffix="dataset-unittest") fn = os.path.join(outdir, 'fn.alignmentset.xml') fn2 = os.path.join(outdir, 'fn2.alignmentset.xml') with AlignmentSet(fn_orig) as aln: aln.copyTo(fn) shutil.copy(fn, fn2) pre_uuid = AlignmentSet(fn).uuid pre_uuid2 = AlignmentSet(fn2).uuid assert pre_uuid == pre_uuid2 cmd = "dataset newuuid --random {d}".format(d=fn) self._run_cmd_with_output(cmd, fn) cmd = "dataset newuuid --random {d}".format(d=fn2) self._run_cmd_with_output(cmd, fn2) post_uuid = AlignmentSet(fn).uuid post_uuid2 = AlignmentSet(fn2).uuid assert pre_uuid != post_uuid assert pre_uuid2 != post_uuid2 # RANDOM, THEREFORE THESE ARE NOT EQUAL: assert post_uuid != post_uuid2
def mainCmpH5(options): alnReader = AlignmentSet(options.inputCmpH5, referenceFastaFname=options.referenceFilename) if options.fofn is not None: alnReader.attach(options.fofn) if options.referenceFilename: referenceTable = loadReferences(options.referenceFilename, alnReader) else: referenceTable = None for refWindow in options.referenceWindows: refId = refWindow.refId refName = alnReader.referenceInfo(refWindow.refId).FullName refLength = alnReader.referenceInfo(refWindow.refId).Length refWindow = refWindow._replace(refId=refId) refWindow = makeDisplayWindow(refLength, options.width, refWindow) if options.rowNumbers != None: alns = alnReader[options.rowNumbers] else: alns = readsInWindow(alnReader, refWindow, options.depth, minMapQV=options.minMapQV, strategy=options.sorting) print windowToGffString(Window(refName, refWindow.start, refWindow.end)) if options.oneAtATime: formatIndividualAlignments(alnReader, refWindow, alns) else: formatWindow(alnReader, refWindow, alns, referenceTable, options.aligned, options.color, options.realign, options.consensus) print
def test_loadMetadata(self): aln = AlignmentSet(data.getXml(7)) assert not aln.metadata.collections aln.loadMetadata('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.run.metadata.xml') assert aln.metadata.collections sset_fn = ('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.subreadset.xml') sset = SubreadSet(sset_fn) orig_metadata = copy.deepcopy(sset.metadata) sset.metadata.collections = None assert not sset.metadata.collections sset.loadMetadata('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.run.metadata.xml') fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name sset.write(fn) validateFile(fn) validateFile(sset_fn) assert sset.metadata == orig_metadata # load the wrong thing... sset_fn = ('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.subreadset.xml') sset = SubreadSet(sset_fn) orig_metadata = copy.deepcopy(sset.metadata) sset.metadata.collections = None assert not sset.metadata.collections with pytest.raises(InvalidDataSetIOError): sset.loadMetadata('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.sts.xml')
def test_subset_filter(self): ds2 = AlignmentSet(data.getXml(7)) assert len(ds2) == 92 modvalue = 8 # manually: hns = ds2.index.holeNumber assert np.count_nonzero(hns % modvalue == 0) == 26 # dset filters: ds2.filters.addRequirement(zm=[('=', '0', modvalue)]) assert len(ds2) == 26 # written: filtstr = '( Uint32Cast(zm) % 8 = 0 )' assert str(ds2.filters) == filtstr filtxmlstr = ('<pbbase:Property Hash="Uint32Cast" Modulo="8" ' 'Name="zm" Operator="=" Value="0"/>') fn = tempfile.NamedTemporaryFile(suffix="alignmentset.xml").name ds2.write(fn) with open(fn, 'r') as ifh: found = False for line in ifh: if filtxmlstr in line: found = True assert found
def test_newUuid_random_cli(self): fn_orig = data.getXml(8) outdir = tempfile.mkdtemp(suffix="dataset-unittest") fn = os.path.join(outdir, 'fn.alignmentset.xml') fn2 = os.path.join(outdir, 'fn2.alignmentset.xml') with AlignmentSet(fn_orig) as aln: aln.copyTo(fn) shutil.copy(fn, fn2) pre_uuid = AlignmentSet(fn).uuid pre_uuid2 = AlignmentSet(fn2).uuid self.assertEqual(pre_uuid, pre_uuid2) cmd = "dataset newuuid --random {d}".format(d=fn) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0) self.assertTrue(os.path.exists(fn)) cmd = "dataset newuuid --random {d}".format(d=fn2) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0) self.assertTrue(os.path.exists(fn2)) post_uuid = AlignmentSet(fn).uuid post_uuid2 = AlignmentSet(fn2).uuid self.assertNotEqual(pre_uuid, post_uuid) self.assertNotEqual(pre_uuid2, post_uuid2) # RANDOM, THEREFORE THESE ARE NOT EQUAL: self.assertNotEqual(post_uuid, post_uuid2)
def _readAlignmentInput(self): """ Read the AlignmentSet input file and store it as self._inAlnFile. """ fname = options.inputFilename self._inAlnFile = AlignmentSet(fname)
def test_loadmetadata_from_dataset_create_cli(self): fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name fn2 = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name log.debug(fn) aln = AlignmentSet(data.getXml(8)) aln.metadata.collections = None aln.copyTo(fn) aln.close() del aln self.assertTrue(os.path.exists(fn)) aln = AlignmentSet(fn) self.assertFalse(aln.metadata.collections) cmd = "dataset create --metadata {m} {o} {i}".format( o=fn2, i=fn, m=("/pbi/dept/secondary/siv/testdata/" "SA3-Sequel/lambda/roche_SAT/" "m54013_151205_032353.subreadset.xml")) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0, m) aln = AlignmentSet(fn2) self.assertTrue(aln.metadata.collections)
def test_len(self): # AlignmentSet aln = AlignmentSet(data.getXml(8), strict=True) self.assertEqual(len(aln), 92) self.assertEqual(aln._length, (92, 123588)) self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 123588) self.assertEqual(aln.numRecords, 92) self.assertEqual(sum(1 for _ in aln), 92) self.assertEqual(sum(len(rec) for rec in aln), 123588) # AlignmentSet with filters aln = AlignmentSet(data.getXml(15), strict=True) self.assertEqual(len(aln), 40) self.assertEqual(aln._length, (40, 52023)) self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) aln.totalLength = -1 aln.numRecords = -1 self.assertEqual(aln.totalLength, -1) self.assertEqual(aln.numRecords, -1) aln.updateCounts() self.assertEqual(aln.totalLength, 52023) self.assertEqual(aln.numRecords, 40) # SubreadSet sset = SubreadSet(data.getXml(10), strict=True) self.assertEqual(len(sset), 92) self.assertEqual(sset._length, (92, 124093)) self.assertEqual(sset.totalLength, 124093) self.assertEqual(sset.numRecords, 92) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 124093) self.assertEqual(sset.numRecords, 92) self.assertEqual(sum(1 for _ in sset), 92) self.assertEqual(sum(len(rec) for rec in sset), 124093) # ReferenceSet sset = ReferenceSet(data.getXml(9), strict=True) self.assertEqual(len(sset), 59) self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59) sset.totalLength = -1 sset.numRecords = -1 self.assertEqual(sset.totalLength, -1) self.assertEqual(sset.numRecords, -1) sset.updateCounts() self.assertEqual(sset.totalLength, 85774) self.assertEqual(sset.numRecords, 59)
def test_readGroupTable(self): aln = AlignmentSet(data.getBam(0), data.getBam(1), data.getBam(2)) readers = aln.resourceReaders() self.assertEqual(len(readers[0].readGroupTable), 1) self.assertEqual(len(readers[1].readGroupTable), 1) self.assertEqual(len(readers[2].readGroupTable), 1) self.assertEqual(len(aln.readGroupTable), 3)
def test_filter(self): ds2 = AlignmentSet(data.getXml(8)) ds2.filters.addRequirement(rname=[('=', 'E.faecalis.1')]) self.assertEqual(len(list(ds2.records)), 20) ds2.disableFilters() self.assertEqual(len(list(ds2.records)), 92) ds2.enableFilters() self.assertEqual(len(list(ds2.records)), 20)
def test_absolutize_cli(self): fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name aln = AlignmentSet(data.getXml(7)) aln.copyTo(fn, relative=True) assert _is_relative(fn) cmd = "dataset absolutize {d}".format(d=fn) self._run_cmd_with_output(cmd, fn) assert not _is_relative(fn)
def test_referenceInfo(self): aln = AlignmentSet(data.getBam(0)) readers = aln.resourceReaders() self.assertEqual(len(readers[0].referenceInfoTable), 59) self.assertEqual( str(readers[0].referenceInfo('E.faecalis.1')), "(27, 27, 'E.faecalis.1', 'E.faecalis.1', 1482, " "'a1a59c267ac1341e5a12bce7a7d37bcb', 0L, 0L)")
def test_alignment_reference(self): rfn = data.getXml(9) rs1 = ReferenceSet(data.getXml(9)) fasta_res = rs1.externalResources[0] fasta_file = urlparse(fasta_res.resourceId).path ds1 = AlignmentSet(data.getXml(8), referenceFastaFname=rs1) aln_ref = None for aln in ds1: aln_ref = aln.reference() break self.assertTrue(aln_ref is not None) self.assertEqual(ds1.externalResources[0].reference, fasta_file) self.assertEqual(ds1.resourceReaders()[0].referenceFasta.filename, fasta_file) ds1 = AlignmentSet(data.getXml(8), referenceFastaFname=fasta_file) aln_ref = None for aln in ds1: aln_ref = aln.reference() break self.assertTrue(aln_ref is not None) self.assertEqual(ds1.externalResources[0].reference, fasta_file) self.assertEqual(ds1.resourceReaders()[0].referenceFasta.filename, fasta_file) ds1 = AlignmentSet(data.getXml(8)) ds1.addReference(fasta_file) aln_ref = None for aln in ds1: aln_ref = aln.reference() break self.assertTrue(aln_ref is not None) self.assertEqual(ds1.externalResources[0].reference, fasta_file) self.assertEqual(ds1.resourceReaders()[0].referenceFasta.filename, fasta_file) fofn_out = tempfile.NamedTemporaryFile(suffix=".fofn").name log.debug(fofn_out) with open(fofn_out, 'w') as f: f.write(data.getXml(8)) f.write('\n') f.write(data.getXml(11)) f.write('\n') ds1 = AlignmentSet(fofn_out, referenceFastaFname=fasta_file) aln_ref = None for aln in ds1: aln_ref = aln.reference() break self.assertTrue(aln_ref is not None) self.assertEqual(ds1.externalResources[0].reference, fasta_file) self.assertEqual(ds1.resourceReaders()[0].referenceFasta.filename, fasta_file)
def test_split_by_contigs_with_split(self): # test to make sure the refWindows work when chunks == # refs ds3 = AlignmentSet(data.getBam()) dss = ds3.split(contigs=True) self.assertEqual(len(dss), 12) refWindows = sorted(reduce(lambda x, y: x + y, [ds.refWindows for ds in dss])) # not all references have something mapped to them, refWindows doesn't # care... self.assertNotEqual(refWindows, sorted(ds3.refWindows)) random_few = [('C.beijerinckii.13', 0, 1433), ('B.vulgatus.4', 0, 1449), ('E.faecalis.1', 0, 1482)] for reference in random_few: found = False for ref in refWindows: if ref == reference: found = True self.assertTrue(found) old_refWindows = refWindows dss = ds3.split(contigs=True, chunks=1) self.assertEqual(len(dss), 1) refWindows = sorted(reduce(lambda x, y: x + y, [ds.refWindows for ds in dss])) self.assertEqual(refWindows, old_refWindows) dss = ds3.split(contigs=True, chunks=24) self.assertEqual(len(dss), 24) refWindows = sorted(reduce(lambda x, y: x + y, [ds.refWindows for ds in dss])) random_few = [('E.faecalis.2', 0, 741), ('E.faecalis.2', 741, 1482)] for ref in random_few: found = False for window in refWindows: if ref == window: found = True if not found: log.debug(ref) self.assertTrue(found) dss = ds3.split(contigs=True, chunks=36) self.assertEqual(len(dss), 36) refWindows = sorted(reduce(lambda x, y: x + y, [ds.refWindows for ds in dss])) random_few = [('E.faecalis.2', 0, 494), ('E.faecalis.2', 494, 988), ('E.faecalis.2', 988, 1482)] for ref in random_few: found = False for window in refWindows: if ref == window: found = True self.assertTrue(found)
def test_referenceInfoTable(self): aln = AlignmentSet(data.getBam(0), data.getBam(1), data.getBam(2)) readers = aln.resourceReaders() self.assertEqual(len(readers[0].referenceInfoTable), 1) self.assertEqual(len(readers[1].referenceInfoTable), 59) self.assertEqual(len(readers[2].referenceInfoTable), 1) self.assertEqual(readers[0].referenceInfoTable.Name, readers[2].referenceInfoTable.Name) self.assertEqual(len(aln.referenceInfoTable), 60)
def test_pbmerge_indexing(self): log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') log.info(outfn) consolidateXml(aln, outfn, cleanup=False) self.assertTrue(os.path.exists(outfn)) self.assertTrue(os.path.exists(outfn + '.pbi')) cons = AlignmentSet(outfn) self.assertEqual(len(aln), len(cons)) orig_stats = os.stat(outfn + '.pbi') cons.externalResources[0].pbi = None self.assertEqual(None, cons.externalResources[0].pbi) # test is too quick, stat times might be within the same second time.sleep(1) cons.induceIndices() self.assertEqual(outfn + '.pbi', cons.externalResources[0].pbi) self.assertEqual(orig_stats, os.stat(cons.externalResources[0].pbi)) cons.externalResources[0].pbi = None self.assertEqual(None, cons.externalResources[0].pbi) # test is too quick, stat times might be within the same second time.sleep(1) cons.induceIndices(force=True) self.assertNotEqual(orig_stats, os.stat(cons.externalResources[0].pbi))
def test_alignmentset_partial_consolidate(self): testFile = ("/pbi/dept/secondary/siv/testdata/SA3-DS/" "lambda/2372215/0007_tiny/Alignment_" "Results/m150404_101626_42267_c10080" "7920800000001823174110291514_s1_p0." "all.alignmentset.xml") aln = AlignmentSet(testFile) nonCons = AlignmentSet(testFile) assert len(aln.toExternalFiles()) == 3 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn, numFiles=2) assert not os.path.exists(outfn) assert os.path.exists(_infixFname(outfn, "0")) assert os.path.exists(_infixFname(outfn, "1")) assert len(aln.toExternalFiles()) == 2 assert len(nonCons.toExternalFiles()) == 3 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset consolidate --numFiles 2 {i} {d} {x}".format( i=testFile, d=datafile, x=xmlfile) log.debug(cmd) subprocess.check_call(cmd.split())
def test_pbmerge_indexing(self): log.debug("Test through API") aln = AlignmentSet(data.getXml(11)) assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') log.info(outfn) consolidateXml(aln, outfn, cleanup=False) assert os.path.exists(outfn) assert os.path.exists(outfn + '.pbi') cons = AlignmentSet(outfn) assert len(aln) == len(cons) orig_stats = os.stat(outfn + '.pbi') cons.externalResources[0].pbi = None assert cons.externalResources[0].pbi is None # test is too quick, stat times might be within the same second time.sleep(1) cons.induceIndices() assert outfn + '.pbi' == cons.externalResources[0].pbi assert orig_stats == os.stat(cons.externalResources[0].pbi) cons.externalResources[0].pbi = None assert cons.externalResources[0].pbi is None # test is too quick, stat times might be within the same second time.sleep(1) cons.induceIndices(force=True) assert orig_stats != os.stat(cons.externalResources[0].pbi)
def test_alignmentset_partial_consolidate(self): testFile = ("/pbi/dept/secondary/siv/testdata/SA3-DS/" "lambda/2372215/0007_tiny/Alignment_" "Results/m150404_101626_42267_c10080" "7920800000001823174110291514_s1_p0." "all.alignmentset.xml") aln = AlignmentSet(testFile) nonCons = AlignmentSet(testFile) self.assertEqual(len(aln.toExternalFiles()), 3) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn, numFiles=2) self.assertFalse(os.path.exists(outfn)) self.assertTrue(os.path.exists(_infixFname(outfn, "0"))) self.assertTrue(os.path.exists(_infixFname(outfn, "1"))) self.assertEqual(len(aln.toExternalFiles()), 2) self.assertEqual(len(nonCons.toExternalFiles()), 3) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset consolidate --numFiles 2 {i} {d} {x}".format(i=testFile, d=datafile, x=xmlfile) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0)
def test_updateCounts(self): log.info("Testing updateCounts without filters") aln = AlignmentSet(data.getBam(0)) readers = aln.resourceReaders() expLen = 0 for reader in readers: for record in reader: expLen += record.readLength self.assertEqual( record.aStart, record.bam.pbi[record.rowNumber]['aStart']) self.assertEqual( record.aEnd, record.bam.pbi[record.rowNumber]['aEnd']) expNum = 0 for reader in readers: expNum += len(reader) accLen = aln.metadata.totalLength accNum = aln.metadata.numRecords self.assertEqual(expLen, accLen) self.assertEqual(expNum, accNum) log.info("Testing whether filters are respected") aln.filters.addRequirement(rname=[('=', 'E.faecalis.1')]) aln.updateCounts() accLen = aln.metadata.totalLength accNum = aln.metadata.numRecords def count(gen): count = 0 for _ in gen: count += 1 return count expLen = 0 for reader in readers: for record in reader: expLen += record.readLength bfile = openIndexedAlignmentFile(data.getBam(0)) rWin = (bfile.referenceInfo('E.faecalis.1').ID, 0, bfile.referenceInfo('E.faecalis.1').Length) reads = bfile.readsInRange(*rWin) expNum = count(reads) expLen = 0 reads = bfile.readsInRange(*rWin) for read in reads: expLen += read.readLength self.assertEqual(expLen, accLen) self.assertEqual(expNum, accNum)
def test_relativize_cli(self): fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name aln = AlignmentSet(data.getXml(8)) aln.copyTo(fn) self.assertFalse(_is_relative(fn)) cmd = "dataset relativize {d}".format(d=fn) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0) self.assertTrue(os.path.exists(fn)) self.assertTrue(_is_relative(fn))
def test_absolutize_cli_3(self): fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, os.path.split(fn)[1]) aln = AlignmentSet(data.getXml(7)) aln.copyTo(fn, relative=True) assert _is_relative(fn) cmd = "dataset absolutize {d} --outdir {o}".format(d=fn, o=outdir) self._run_cmd_with_output(cmd, fn) assert os.path.exists(outfn) assert _is_relative(fn) assert not _is_relative(outfn)
def test_updateCounts(self): log.info("Testing updateCounts without filters") aln = AlignmentSet(data.getBam(0)) readers = aln.resourceReaders() expLen = 0 for reader in readers: for record in reader: expLen += record.readLength self.assertEqual(record.aStart, record.bam.pbi[record.rowNumber]['aStart']) self.assertEqual(record.aEnd, record.bam.pbi[record.rowNumber]['aEnd']) expNum = 0 for reader in readers: expNum += len(reader) accLen = aln.metadata.totalLength accNum = aln.metadata.numRecords self.assertEqual(expLen, accLen) self.assertEqual(expNum, accNum) log.info("Testing whether filters are respected") aln.filters.addRequirement(rname=[('=', 'E.faecalis.1')]) aln.updateCounts() accLen = aln.metadata.totalLength accNum = aln.metadata.numRecords def count(gen): count = 0 for _ in gen: count += 1 return count expLen = 0 for reader in readers: for record in reader: expLen += record.readLength bfile = openIndexedAlignmentFile(data.getBam(0)) rWin = (bfile.referenceInfo('E.faecalis.1').ID, 0, bfile.referenceInfo('E.faecalis.1').Length) reads = bfile.readsInRange(*rWin) expNum = count(reads) expLen = 0 reads = bfile.readsInRange(*rWin) for read in reads: expLen += read.readLength self.assertEqual(expLen, accLen) self.assertEqual(expNum, accNum)
def test_newUuid_cli(self): fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name aln = AlignmentSet(data.getXml(8)) aln.copyTo(fn) pre_uuid = AlignmentSet(fn).uuid cmd = "dataset newuuid {d}".format(d=fn) log.debug(cmd) o, r, m = backticks(cmd) post_uuid = AlignmentSet(fn).uuid self.assertEqual(r, 0) self.assertTrue(os.path.exists(fn)) self.assertNotEqual(pre_uuid, post_uuid)
def loadSharedAlignmentSet(self, alignmentFilename): """ Read the input AlignmentSet so the indices can be shared with the slaves. This is also used to pass to ReferenceUtils for setting up the ipdModel object. """ logging.info("Reading AlignmentSet: %s" % alignmentFilename) logging.info(" reference: %s" % self.args.reference) self.alignments = AlignmentSet(alignmentFilename, referenceFastaFname=self.args.reference) # XXX this should ensure that the file(s) get opened, including any # .pbi indices - but need to confirm this self.refInfo = self.alignments.referenceInfoTable
def test_nested_external_resources(self): log.debug("Testing nested externalResources in AlignmentSets") aln = AlignmentSet(data.getXml(0), skipMissing=True) self.assertTrue(aln.externalResources[0].pbi) self.assertTrue(aln.externalResources[0].reference) self.assertEqual( aln.externalResources[0].externalResources[0].metaType, 'PacBio.ReferenceFile.ReferenceFastaFile') self.assertEqual(aln.externalResources[0].scraps, None) log.debug("Testing nested externalResources in SubreadSets") subs = SubreadSet(data.getXml(5), skipMissing=True) self.assertTrue(subs.externalResources[0].scraps) self.assertEqual( subs.externalResources[0].externalResources[0].metaType, 'PacBio.SubreadFile.ScrapsBamFile') self.assertEqual(subs.externalResources[0].reference, None) log.debug("Testing added nested externalResoruces to SubreadSet") subs = SubreadSet(data.getXml(10)) self.assertFalse(subs.externalResources[0].scraps) subs.externalResources[0].scraps = 'fake.fasta' self.assertTrue(subs.externalResources[0].scraps) self.assertEqual( subs.externalResources[0].externalResources[0].metaType, 'PacBio.SubreadFile.ScrapsBamFile') subs.externalResources[0].barcodes = 'bc.fasta' self.assertTrue(subs.externalResources[0].barcodes) self.assertEqual( subs.externalResources[0].externalResources[1].metaType, "PacBio.DataSet.BarcodeSet") subs.externalResources[0].adapters = 'foo.adapters.fasta' self.assertEqual(subs.externalResources[0].adapters, 'foo.adapters.fasta') self.assertEqual( subs.externalResources[0].externalResources[2].metaType, "PacBio.SubreadFile.AdapterFastaFile") log.debug("Testing adding nested externalResources to AlignmetnSet " "manually") aln = AlignmentSet(data.getXml(8)) self.assertTrue(aln.externalResources[0].bai) self.assertTrue(aln.externalResources[0].pbi) self.assertFalse(aln.externalResources[0].reference) aln.externalResources[0].reference = 'fake.fasta' self.assertTrue(aln.externalResources[0].reference) self.assertEqual( aln.externalResources[0].externalResources[0].metaType, 'PacBio.ReferenceFile.ReferenceFastaFile')
def _run(self): logging.info("Worker %s (PID=%d) started running" % (self.name, self.pid)) if self._sharedAlignmentSet is not None: # XXX this will create an entirely new AlignmentSet object, but # keeping any indices already loaded into memory self.caseAlignments = _reopen(self._sharedAlignmentSet) # `self._sharedAlignmentSet.close() self._sharedAlignmentSet = None else: warnings.warn("Shared AlignmentSet not used") self.caseAlignments = AlignmentSet(self.options.infile, referenceFastaFname=self.options.reference) self.controlAlignments = None if not self.options.control is None: self.controlAlignments = AlignmentSet(self.options.control, referenceFastaFname=self.options.reference) if self.options.randomSeed is None: np.random.seed(42) self.onStart() while True: if self.isTerminated(): break chunkDesc = self._workQueue.get() if chunkDesc is None: # Sentinel indicating end of input. Place a sentinel # on the results queue and end this worker process. self._resultsQueue.put(None) self._workQueue.task_done() break else: (chunkId, datum) = chunkDesc logging.info("Got chunk: (%s, %s) -- Process: %s" % (chunkId, str(datum), current_process())) result = self.onChunk( datum) # pylint: disable=assignment-from-none logging.debug("Process %s: putting result." % current_process()) self._resultsQueue.put((chunkId, result)) self._workQueue.task_done() self.onFinish() logging.info("Process %s (PID=%d) done; exiting." % (self.name, self.pid))
def mainGff(options): reader = GffReader(options.inputGff) alnsFname, referenceFname = extractCmpH5AndReferenceFromGff(reader) # Allow overriding alnsFname = options.inputCmpH5 or alnsFname referenceFname = options.referenceFilename or referenceFname assert os.path.isfile(alnsFname) assert os.path.isfile(referenceFname) alnReader = AlignmentSet(alnsFname, referenceFastaFname=referenceFname) if options.fofn is not None: alnReader.attach(options.fofn) referenceTable = loadReferences(referenceFname, alnReader) for i, gffRecord in enumerate(reader): referenceSeq = gffRecord.get("reference", "-") variantSeq = gffRecord.get("variantSeq", "-") variantConfidence = gffRecord.confidence variantSummary = "(%s > %s)" % (referenceSeq, variantSeq) print gffRecord.type, gffRecord.seqid, gffRecord.start, gffRecord.end, \ variantSummary, variantConfidence refId = gffRecord.seqid refLength = alnReader.referenceInfo(gffRecord.seqid).Length refWindow = makeDisplayWindow(refLength, options.width, Window(refId, gffRecord.start-10, gffRecord.end+10)) if "rows" in gffRecord.attributes: alns = alnReader[map(int, gffRecord.rows.split(","))] else: alns = readsInWindow(alnReader, refWindow, options.depth, minMapQV=options.minMapQV, strategy=options.sorting) formatWindow(alnReader, refWindow, alns, referenceTable, aligned=(gffRecord.type != "insertion"), consensus=options.consensus, useColor=options.color, doRealign=options.realign) if options.pulseRecognizer: # CSV output for pulse recognizer print csvFname = "variant-" + str(i) + ".csv" dumpVariantCsv(csvFname, alnReader, alns, gffRecord) formatVariantCsvLink(csvFname) print
def test_filter_cli(self): outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, "filtered8.xml") log.debug(outfn) cmd = "dataset filter {i} {o} {f}".format(i=data.getXml(7), o=outfn, f="rname=E.faecalis.1") self._run_cmd_with_output(cmd, outfn) aln = AlignmentSet(data.getXml(7)) aln.filters.addRequirement(rname=[('=', 'E.faecalis.1')]) aln.updateCounts() dset = AlignmentSet(outfn) assert str(aln.filters) == str(dset.filters) assert aln.totalLength == dset.totalLength assert aln.numRecords == dset.numRecords
def main(): datasets, dtype, subsampleto, title, output = parseArgs() d = [] for dset in datasets: if dtype == 'AlignmentSet': f = AlignmentSet(dset) elif dtype == 'SubreadSet': f = SubreadSet(dset) else: raise ValueError('invalid dataSetType') x = f.index['holeNumber'] / UINTMAX16 y = f.index['holeNumber'] - x * UINTMAX16 if len(f) > subsampleto: x, y = zip(*random.sample(zip(x, y), subsampleto)) h = Scatter(x=x, y=y, mode='markers', marker=dict(size=5, opacity=0.2), showlegend=False) d.append(h) layout = Layout(title=title, height=600, width=600, xaxis=dict(title='X', range=[0, 1500]), yaxis=dict(title='Y', range=[0, 1500])) fig = Figure(data=d, layout=layout) plot(fig, show_link=False, auto_open=False, filename=output)
def find_discordant_mappings(file_name, max_subread_distance=25000): """ Verify that aligned subreads from the same polymerase read are concordant. Written as a generator to facilitate interactive use. """ mapping_dict = {} n = 0 with AlignmentSet(file_name) as ds: for alignment in ds: read_id = (alignment.movieName, alignment.HoleNumber) reference_name = alignment.referenceInfo.FullName reference_pos = int(alignment.tStart) # Comes as a uint if read_id not in mapping_dict: mapping_dict[read_id] = (reference_name, reference_pos, alignment.qName) else: assert reference_name == mapping_dict[read_id][0] delta = mapping_dict[read_id][1] - reference_pos msg = "non-concordant mappings for {a} and {b}: " +\ "delta={d} (= |{t} - {u}|)" if abs(delta) > max_subread_distance: yield msg.format(a=mapping_dict[read_id][2], b=alignment.qName, d=delta, t=mapping_dict[read_id][1], u=alignment.tStart)
def _get_reads_info(aligned_reads_file): """ Extract information from the BAM files. Returns a tuple of length 2. First item is a dictionary of dictionaries, such that holes are mapped by cell, then set. Second item is the instrument name. :param aligned_reads_file: (str) path to aligned_reads[.xml,.bam] :return tuple (reads_by_cell_then_set, instrument) (dict, string): A dictionary of dictionaries, instrument name """ inst = None reads_by_cell = defaultdict(set) with AlignmentSet(aligned_reads_file) as ds: for bamfile in ds.resourceReaders(): if ds.isIndexed: logging.info("Indexed file - will use fast loop.") for (hole, rgId) in zip(bamfile.holeNumber, bamfile.qId): movie_name = bamfile.readGroupInfo(rgId).MovieName cell = movie_to_cell(movie_name) if inst is None: inst = _cell_2_inst(cell) reads_by_cell[cell].add(hole) else: for aln in bamfile: hole = aln.HoleNumber movie_name = aln.movieName cell = movie_to_cell(movie_name) if inst is None: inst = _cell_2_inst(cell) reads_by_cell[cell].add(hole) return reads_by_cell, inst
def make_sat_report(aligned_reads_file, mapping_stats_report, variants_report, report, output_dir): """ Entry to report. :param aligned_reads_file: (str) path to aligned_reads.xml :param mapping_stats_report: (str) path to mapping stats json report :param variants_report: (str) path to variants report """ _validate_inputs([('aligned_reads_file', aligned_reads_file), ('mapping_stats_report', mapping_stats_report), ('variants_report', variants_report)]) d_map = _get_mapping_stats_data(mapping_stats_report) reads, inst = _get_reads_info(aligned_reads_file) d_bam = _get_read_hole_data(reads, inst) d_var = _get_variants_data(variants_report) ds = AlignmentSet(aligned_reads_file) rpt = Report(meta_rpt.id, dataset_uuids=(ds.uuid, )) rpt.add_attribute( Attribute(Constants.A_INSTRUMENT, d_bam[Constants.A_INSTRUMENT])) rpt.add_attribute( Attribute(Constants.A_COVERAGE, d_var[Constants.A_COVERAGE])) rpt.add_attribute( Attribute(Constants.A_CONCORDANCE, d_var[Constants.A_CONCORDANCE])) rpt.add_attribute( Attribute(Constants.A_READLENGTH, d_map[Constants.A_READLENGTH])) rpt.add_attribute(Attribute(Constants.A_READS, d_bam[Constants.A_READS])) rpt = meta_rpt.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report))
def _readCmpH5Input(self): """ Read the CmpH5 input file into a CmpH5 object and store it as self._inCmpH5. """ fname = options.inputFilename self._inCmpH5 = AlignmentSet(fname)
def test_add_double_bound_filters(self): ds1 = AlignmentSet(data.getXml(8)) ds1.filters.addRequirement(rq=[('>', '0.85'), ('<', '0.99')]) self.assertEquals(str(ds1.filters), '( rq > 0.85 ) OR ( rq < 0.99 )') ds1 = AlignmentSet(data.getXml(8)) self.assertEquals(str(ds1.filters), '') ds1.filters.addFilter(rq=[('>', '0.85'), ('<', '0.99')]) self.assertEquals(str(ds1.filters), '( rq > 0.85 AND rq < 0.99 )') ds1.filters.addFilter(length=[('>', '1000')]) self.assertEquals(str(ds1.filters), '( rq > 0.85 AND rq < 0.99 ) OR ( length > 1000 )') ds1.filters.removeFilter(0) self.assertEquals(str(ds1.filters), '( length > 1000 )')
def test_uuid(self): ds = AlignmentSet() old = ds.uuid _ = ds.newUuid() self.assertNotEqual(old, ds.uuid) aln = AlignmentSet(data.getXml(no=8)) oldUuid = aln.uuid outdir = tempfile.mkdtemp(suffix="dataset-doctest") outXml = os.path.join(outdir, 'tempfile.xml') aln.write(outXml) aln = AlignmentSet(outXml) self.assertEqual(aln.uuid, oldUuid)
def test_uuid(self): ds = AlignmentSet() old = ds.uuid _ = ds.newUuid() assert not old == ds.uuid aln = AlignmentSet(data.getXml(7)) oldUuid = aln.uuid outdir = tempfile.mkdtemp(suffix="dataset-doctest") outXml = os.path.join(outdir, 'tempfile.xml') aln.write(outXml) aln = AlignmentSet(outXml) assert aln.uuid == oldUuid
def test_alignmentset_partial_consolidate(self): testFile = ("/mnt/secondary-siv/testdata/SA3-DS/" "lambda/2372215/0007_tiny/Alignment_" "Results/m150404_101626_42267_c10080" "7920800000001823174110291514_s1_p0." "all.alignmentset.xml") aln = AlignmentSet(testFile) nonCons= AlignmentSet(testFile) self.assertEqual(len(aln.toExternalFiles()), 3) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn, numFiles=2) self.assertFalse(os.path.exists(outfn)) self.assertTrue(os.path.exists(_infixFname(outfn, "0"))) self.assertTrue(os.path.exists(_infixFname(outfn, "1"))) self.assertEqual(len(aln.toExternalFiles()), 2) self.assertEqual(len(nonCons.toExternalFiles()), 3) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset.py consolidate --numFiles 2 {i} {d} {x}".format( i=testFile, d=datafile, x=xmlfile) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0)
def to_chunked_alignmentset_files(alignmentset_path, reference_path, max_total_nchunks, chunk_key, dir_name, base_name, ext): dset = AlignmentSet(alignmentset_path, strict=True) dset_chunks = dset.split(contigs=True, maxChunks=max_total_nchunks, breakContigs=True) # sanity checking reference_set = ReferenceSet(reference_path, strict=True) d = {} for i, dset in enumerate(dset_chunks): chunk_id = '_'.join([base_name, str(i)]) chunk_name = '.'.join([chunk_id, ext]) chunk_path = os.path.join(dir_name, chunk_name) dset.write(chunk_path) d[chunk_key] = os.path.abspath(chunk_path) d['$chunk.reference_id'] = reference_path c = PipelineChunk(chunk_id, **d) yield c
def test_split_references(self): test_file_1 = ('/pbi/dept/secondary/siv/testdata/SA3-RS/lambda/' '2372215/0007_tiny/Alignment_Results/m150404_1016' '26_42267_c100807920800000001823174110291514_s1_p' '0.1.aligned.bam') test_file_2 = ('/pbi/dept/secondary/siv/testdata/SA3-Sequel/ecoli/' '315/3150204/r54049_20160508_152025/1_A01/Alignment' '_Results/m54049_160508_155917.alignmentset.xml') test_file_3 = ('/pbi/dept/secondary/siv/testdata/SA3-RS/ecoli/' 'tiny-multimovie/Alignment_Results/' 'combined.alignmentset.xml') NREC1 = len(AlignmentSet(test_file_1)) NREC2 = len(AlignmentSet(test_file_2)) NREC3 = len(AlignmentSet(test_file_3)) NREC = NREC1 + NREC2 + NREC3 self.assertNotEqual(NREC1, 0) self.assertNotEqual(NREC2, 0) self.assertNotEqual(NREC3, 0) self.assertNotEqual(NREC, 0) ds1 = AlignmentSet(test_file_1, test_file_2, test_file_3) # used to get total: #self.assertEqual(sum(1 for _ in ds1), N_RECORDS) self.assertEqual(len(ds1), NREC) dss = ds1.split_references(1) self.assertEqual(len(dss), 1) self.assertEqual(sum([len(ds_) for ds_ in dss]), NREC) self.assertEqual(len(ds1), NREC) self.assertFalse(ds1.filters) dss = ds1.split_references(12) self.assertEqual(len(dss), 2) self.assertEqual(sum([len(ds_) for ds_ in dss]), NREC) self.assertEqual(len(set(dss[0].index.tId)), 1) self.assertEqual(len(set(dss[-1].index.tId)), 1) self.assertEqual( dss[0].tid2rname[list(set(dss[0].index.tId))[0]], 'ecoliK12_pbi_March2013') self.assertEqual(len(dss[0]), NREC2 + NREC3) self.assertEqual( dss[-1].tid2rname[list(set(dss[-1].index.tId))[0]], 'lambda_NEB3011') self.assertEqual(len(dss[-1]), NREC1)
def test_filter_cli(self): outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, "filtered8.xml") log.debug(outfn) cmd = "dataset filter {i} {o} {f}".format( i=data.getXml(8), o=outfn, f="rname=E.faecalis.1") log.debug(cmd) o, r, m = backticks(cmd) if r != 0: log.debug(m) self.assertEqual(r, 0) self.assertTrue(os.path.exists(outfn)) aln = AlignmentSet(data.getXml(8)) aln.filters.addRequirement(rname=[('=', 'E.faecalis.1')]) aln.updateCounts() dset = AlignmentSet(outfn) self.assertEqual(str(aln.filters), str(dset.filters)) self.assertEqual(aln.totalLength, dset.totalLength) self.assertEqual(aln.numRecords, dset.numRecords)
def loadSharedAlignmentSet(self, cmpH5Filename): """ Read the input AlignmentSet so the indices can be shared with the slaves. This is also used to pass to ReferenceUtils for setting up the ipdModel object. """ logging.info("Reading AlignmentSet: %s" % cmpH5Filename) logging.info(" reference: %s" % self.args.reference) self.alignments = AlignmentSet(cmpH5Filename, referenceFastaFname=self.args.reference) # XXX this should ensure that the file(s) get opened, including any # .pbi indices - but need to confirm this self.refInfo = self.alignments.referenceInfoTable
def test_referenceInfoTableMerging(self): log.info("Testing refIds, etc. after merging") ds = DataSet(data.getXml(17)) also_lambda = ds.toExternalFiles()[0] aln = AlignmentSet(data.getBam(0), data.getBam(0), also_lambda) readers = aln.resourceReaders() ids = sorted([i for _, i in aln.refInfo('ID')]) self.assertEqual(range(len(ids)), ids) accNames = aln.refNames expNames = reduce(np.append, [reader.referenceInfoTable['Name'] for reader in readers]) expNames = np.unique(expNames) self.assertEqual(sorted(expNames), sorted(accNames)) accNames = aln.fullRefNames expNames = reduce(np.append, [reader.referenceInfoTable['FullName'] for reader in readers]) expNames = np.unique(expNames) self.assertEqual(sorted(expNames), sorted(accNames))
def test_loadMetadata(self): aln = AlignmentSet(data.getXml(no=8)) self.assertFalse(aln.metadata.collections) aln.loadMetadata('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.run.metadata.xml') self.assertTrue(aln.metadata.collections) sset_fn = ('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.subreadset.xml') sset = SubreadSet(sset_fn) orig_metadata = copy.deepcopy(sset.metadata) sset.metadata.collections = None self.assertFalse(sset.metadata.collections) sset.loadMetadata('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.run.metadata.xml') stack = zip(sset.metadata, orig_metadata) fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name sset.write(fn) validateFile(fn) validateFile(sset_fn) self.assertEqual(sset.metadata, orig_metadata) # load the wrong thing... sset_fn = ('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.subreadset.xml') sset = SubreadSet(sset_fn) orig_metadata = copy.deepcopy(sset.metadata) sset.metadata.collections = None self.assertFalse(sset.metadata.collections) with self.assertRaises(InvalidDataSetIOError): sset.loadMetadata('/pbi/dept/secondary/siv/testdata/' 'SA3-Sequel/lambda/roche_SAT/' 'm54013_151205_032353.sts.xml')
def test_absolutize_cli(self): fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name aln = AlignmentSet(data.getXml(8)) aln.copyTo(fn, relative=True) self.assertTrue(_is_relative(fn)) cmd = "dataset absolutize {d}".format(d=fn) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0) self.assertTrue(os.path.exists(fn)) self.assertFalse(_is_relative(fn)) fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name outfn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name aln = AlignmentSet(data.getXml(8)) aln.copyTo(fn, relative=True) self.assertTrue(_is_relative(fn)) cmd = "dataset absolutize {d} --outdir {o}".format(d=fn, o=outfn) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0) self.assertTrue(os.path.exists(fn)) self.assertTrue(os.path.exists(outfn)) self.assertTrue(_is_relative(fn)) self.assertFalse(_is_relative(outfn)) fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, os.path.split(fn)[1]) aln = AlignmentSet(data.getXml(8)) aln.copyTo(fn, relative=True) self.assertTrue(_is_relative(fn)) cmd = "dataset absolutize {d} --outdir {o}".format(d=fn, o=outdir) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0) self.assertTrue(os.path.exists(fn)) self.assertTrue(os.path.exists(outfn)) self.assertTrue(_is_relative(fn)) self.assertFalse(_is_relative(outfn))
def test_write(self): outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfile = os.path.join(outdir, 'tempfile.xml') ds1 = AlignmentSet(data.getBam()) ds1.write(outfile) log.debug('Validated file: {f}'.format(f=outfile)) validateFile(outfile) ds2 = AlignmentSet(outfile) self.assertTrue(ds1 == ds2) # Should fail when strict: ds3 = AlignmentSet(data.getBam()) ds3.write(outfile)