def test_alignmentset_partial_consolidate(self): testFile = ("/pbi/dept/secondary/siv/testdata/SA3-DS/" "lambda/2372215/0007_tiny/Alignment_" "Results/m150404_101626_42267_c10080" "7920800000001823174110291514_s1_p0." "all.alignmentset.xml") aln = AlignmentSet(testFile) nonCons = AlignmentSet(testFile) self.assertEqual(len(aln.toExternalFiles()), 3) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn, numFiles=2) self.assertFalse(os.path.exists(outfn)) self.assertTrue(os.path.exists(_infixFname(outfn, "0"))) self.assertTrue(os.path.exists(_infixFname(outfn, "1"))) self.assertEqual(len(aln.toExternalFiles()), 2) self.assertEqual(len(nonCons.toExternalFiles()), 3) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset consolidate --numFiles 2 {i} {d} {x}".format(i=testFile, d=datafile, x=xmlfile) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0)
def test_alignmentset_partial_consolidate(self): testFile = ("/mnt/secondary-siv/testdata/SA3-DS/" "lambda/2372215/0007_tiny/Alignment_" "Results/m150404_101626_42267_c10080" "7920800000001823174110291514_s1_p0." "all.alignmentset.xml") aln = AlignmentSet(testFile) nonCons= AlignmentSet(testFile) self.assertEqual(len(aln.toExternalFiles()), 3) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn, numFiles=2) self.assertFalse(os.path.exists(outfn)) self.assertTrue(os.path.exists(_infixFname(outfn, "0"))) self.assertTrue(os.path.exists(_infixFname(outfn, "1"))) self.assertEqual(len(aln.toExternalFiles()), 2) self.assertEqual(len(nonCons.toExternalFiles()), 3) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset.py consolidate --numFiles 2 {i} {d} {x}".format( i=testFile, d=datafile, x=xmlfile) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0)
def test_alignmentset_partial_consolidate(self): testFile = ("/pbi/dept/secondary/siv/testdata/SA3-DS/" "lambda/2372215/0007_tiny/Alignment_" "Results/m150404_101626_42267_c10080" "7920800000001823174110291514_s1_p0." "all.alignmentset.xml") aln = AlignmentSet(testFile) nonCons = AlignmentSet(testFile) assert len(aln.toExternalFiles()) == 3 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn, numFiles=2) assert not os.path.exists(outfn) assert os.path.exists(_infixFname(outfn, "0")) assert os.path.exists(_infixFname(outfn, "1")) assert len(aln.toExternalFiles()) == 2 assert len(nonCons.toExternalFiles()) == 3 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset consolidate --numFiles 2 {i} {d} {x}".format( i=testFile, d=datafile, x=xmlfile) log.debug(cmd) subprocess.check_call(cmd.split())
def test_alignmentset_partial_consolidate(self): testFile = ("/pbi/dept/secondary/siv/testdata/SA3-DS/" "lambda/2372215/0007_tiny/Alignment_" "Results/m150404_101626_42267_c10080" "7920800000001823174110291514_s1_p0." "all.alignmentset.xml") aln = AlignmentSet(testFile) nonCons = AlignmentSet(testFile) self.assertEqual(len(aln.toExternalFiles()), 3) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn, numFiles=2) self.assertFalse(os.path.exists(outfn)) self.assertTrue(os.path.exists(_infixFname(outfn, "0"))) self.assertTrue(os.path.exists(_infixFname(outfn, "1"))) self.assertEqual(len(aln.toExternalFiles()), 2) self.assertEqual(len(nonCons.toExternalFiles()), 3) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons))
def test_alignmentset_consolidate(self): log.debug("Test methods directly") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln, useTmp=False) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test methods directly in tmp") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln, useTmp=True) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons)))
def test_alignmentset_consolidate(self): log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with one reference") aln = AlignmentSet(data.getXml(12)) reference = upstreamData.getFasta() aln.externalResources[0].reference = reference nonCons = aln.copy() self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) #nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) self.assertEqual(aln.externalResources[0].reference, reference) log.debug("Test with two references") aln = AlignmentSet(data.getXml(12)) reference = upstreamData.getFasta() for extRes in aln.externalResources: extRes.reference = reference self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) #nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) self.assertEqual(aln.externalResources[0].reference, reference)
def test_alignmentset_consolidate(self): log.debug("Test methods directly") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset.py consolidate {i} {d} {x}".format(i=data.getXml(12), d=datafile, x=xmlfile) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0)
def test_alignmentset_consolidate(self): log.debug("Test methods directly") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset consolidate {i} {d} {x}".format(i=data.getXml(12), d=datafile, x=xmlfile) log.debug(cmd) o, r, m = backticks(cmd) self.assertEqual(r, 0)
def test_alignmentset_consolidate(self): log.debug("Test through API") aln = AlignmentSet(data.getXml(11)) assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) assert len(list(aln)) == 7 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) assert len(list(aln)) == 7 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(accuracy=[('>', '.85')]) assert len(list(aln)) == 174 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) log.debug("Test with one reference") aln = AlignmentSet(data.getXml(11)) reference = upstreamData.getFasta() aln.externalResources[0].reference = reference nonCons = aln.copy() assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 #nonCons = AlignmentSet(data.getXml(11)) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) assert aln.externalResources[0].reference == reference log.debug("Test with two references") aln = AlignmentSet(data.getXml(11)) reference = upstreamData.getFasta() for extRes in aln.externalResources: extRes.reference = reference assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 #nonCons = AlignmentSet(data.getXml(11)) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) assert aln.externalResources[0].reference == reference
def test_alignmentset_consolidate(self): log.debug("Test through API") aln = AlignmentSet(pbtestdata.get_file("aligned-ds-2")) assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(pbtestdata.get_file("aligned-ds-2")) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(pbtestdata.get_file("aligned-ds-2")) assert len(list(aln)) == 21 aln.filters.addRequirement(length=[(">=", 10000)]) assert len(list(aln)) == 10 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(pbtestdata.get_file("aligned-ds-2")) nonCons.filters.addRequirement(length=[(">=", 10000)]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) assert len(list(aln)) == 7 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(accuracy=[('>', '.85')]) assert len(list(aln)) == 174 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) log.debug("Test cli") outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "merged.bam") xmlfile = os.path.join(outdir, "merged.xml") cmd = "dataset consolidate {i} {d} {x}".format(i=data.getXml(11), d=datafile, x=xmlfile) log.debug(cmd) subprocess.check_call(cmd.split())