def test_readFasta(self): f = FastaReader(data.getFasta()) entries = list(f) assert_equal(48, len(entries)) assert_equal("ref000001|EGFR_Exon_2", entries[0].header) assert_equal("TTTCTTCCAGTTTGCCAAGGCACGAGTAACAAGCTCACGCAGTTGGGCACTTT" "TGAAGATCATTTTCTCAGCCTCCAGAGGATGTTCAATAACTGTGAGGTGGTCC" "TTGGGAATTTGGAAATTACCTATGTGCAGAGGAATTATGATCTTTCCTTCTTA" "AAGGTTGGTGACTTTGATTTTCCT", entries[0].sequence)
def test_readFasta(self): f = FastaReader(data.getFasta()) entries = list(f) assert_equal(48, len(entries)) assert_equal("ref000001|EGFR_Exon_2", entries[0].name) assert_equal("TTTCTTCCAGTTTGCCAAGGCACGAGTAACAAGCTCACGCAGTTGGGCACTTT" "TGAAGATCATTTTCTCAGCCTCCAGAGGATGTTCAATAACTGTGAGGTGGTCC" "TTGGGAATTTGGAAATTACCTATGTGCAGAGGAATTATGATCTTTCCTTCTTA" "AAGGTTGGTGACTTTGATTTTCCT", entries[0].sequence) assert_equal("e3912e9ceacd6538ede8c1b2adda7423", entries[0].md5)
def test_merged_contigset(self): fn = tempfile.NamedTemporaryFile(suffix=".contigset.xml").name with ContigSet(upstreamData.getLambdaFasta(), upstreamData.getFasta()) as cset: self.assertEqual(len(list(cset)), 49) self.assertEqual(len(cset), 49) cset.consolidate() cset.write(fn) log.debug("Writing to {f}".format(f=fn)) self.assertEqual(len(list(cset)), 49) self.assertEqual(len(cset), 49) with ContigSet(fn) as cset: self.assertEqual(len(list(cset)), 49) self.assertEqual(len(cset), 49)
def setup(self): self.fastaPath = data.getFasta()
def test_alignmentset_consolidate(self): log.debug("Test methods directly") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln, useTmp=False) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test methods directly in tmp") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') consolidateBams(aln.toExternalFiles(), outfn, filterDset=aln, useTmp=True) self.assertTrue(os.path.exists(outfn)) consAln = AlignmentSet(outfn) self.assertEqual(len(consAln.toExternalFiles()), 1) for read1, read2 in zip(sorted(list(aln)), sorted(list(consAln))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(consAln)) log.debug("Test through API") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(list(aln)), 7) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(12)) self.assertEqual(len(list(aln)), 177) aln.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(list(aln)), 174) self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) nonCons = AlignmentSet(data.getXml(12)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(list(aln)), len(list(nonCons))) log.debug("Test with one reference") aln = AlignmentSet(data.getXml(12)) reference = upstreamData.getFasta() aln.externalResources[0].reference = reference nonCons = aln.copy() self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) #nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) self.assertEqual(aln.externalResources[0].reference, reference) log.debug("Test with two references") aln = AlignmentSet(data.getXml(12)) reference = upstreamData.getFasta() for extRes in aln.externalResources: extRes.reference = reference self.assertEqual(len(aln.toExternalFiles()), 2) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) self.assertTrue(os.path.exists(outfn)) self.assertEqual(len(aln.toExternalFiles()), 1) #nonCons = AlignmentSet(data.getXml(12)) self.assertEqual(len(nonCons.toExternalFiles()), 2) for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): self.assertEqual(read1, read2) self.assertEqual(len(aln), len(nonCons)) self.assertEqual(aln.externalResources[0].reference, reference)
def test_alignmentset_consolidate(self): log.debug("Test through API") aln = AlignmentSet(data.getXml(11)) assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) # Test that it is a valid xml: outdir = tempfile.mkdtemp(suffix="dataset-unittest") datafile = os.path.join(outdir, "apimerged.bam") xmlfile = os.path.join(outdir, "apimerged.xml") log.debug(xmlfile) aln.write(xmlfile) log.debug("Test with cheap filter") aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) assert len(list(aln)) == 7 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(rname=[('=', 'B.vulgatus.5')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) log.debug("Test with not refname filter") # This isn't trivial with bamtools """ aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) assert len(list(aln)) == 7 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(rname=[('!=', 'B.vulgatus.5')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) """ log.debug("Test with expensive filter") aln = AlignmentSet(data.getXml(11)) assert len(list(aln)) == 177 aln.filters.addRequirement(accuracy=[('>', '.85')]) assert len(list(aln)) == 174 assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 nonCons = AlignmentSet(data.getXml(11)) nonCons.filters.addRequirement(accuracy=[('>', '.85')]) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(list(aln)) == len(list(nonCons)) log.debug("Test with one reference") aln = AlignmentSet(data.getXml(11)) reference = upstreamData.getFasta() aln.externalResources[0].reference = reference nonCons = aln.copy() assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 #nonCons = AlignmentSet(data.getXml(11)) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) assert aln.externalResources[0].reference == reference log.debug("Test with two references") aln = AlignmentSet(data.getXml(11)) reference = upstreamData.getFasta() for extRes in aln.externalResources: extRes.reference = reference assert len(aln.toExternalFiles()) == 2 outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'merged.bam') aln.consolidate(outfn) assert os.path.exists(outfn) assert len(aln.toExternalFiles()) == 1 #nonCons = AlignmentSet(data.getXml(11)) assert len(nonCons.toExternalFiles()) == 2 for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))): assert read1 == read2 assert len(aln) == len(nonCons) assert aln.externalResources[0].reference == reference
class TestIndexedFastaReader: FASTAPATH = data.getFasta() def testIteration(self): ft = IndexedFastaReader(self.FASTAPATH) fr = FastaReader(self.FASTAPATH) ftContigs = list(ft) frContigs = list(fr) assert len(frContigs) == len(ftContigs) assert 48 == len(ftContigs) for ftC, frC in zip(ftContigs, frContigs): assert frC.header == ftC.header assert frC.sequence == ftC.sequence[:] # Unlike FastaReader, IndexedFastaReader iteration is repeatable. assert 48 == len(list(ft)) def testAccessByName(self): ft = IndexedFastaReader(self.FASTAPATH) r000021 = ft["ref000021|EGFR_Exon_22\tMetadataTest"] assert "ref000021|EGFR_Exon_22\tMetadataTest" == r000021.header assert "ref000021|EGFR_Exon_22" == r000021.id assert "MetadataTest" == r000021.comment assert ("CACTGCCTCATCTCTCACCATCCCAAGGTGCCTATCAAGTGGATGGCATTGGAATCAATT" "TTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGTGAGTCATAATCCTGA" "TGCTAATGAGTTTGTACTGAGGCCAAGCTGG") == r000021.sequence[:] def testAccessById(self): ft = IndexedFastaReader(self.FASTAPATH) r000021 = ft["ref000021|EGFR_Exon_22"] assert "ref000021|EGFR_Exon_22\tMetadataTest" == r000021.header assert "ref000021|EGFR_Exon_22" == r000021.id assert "MetadataTest" == r000021.comment assert ("CACTGCCTCATCTCTCACCATCCCAAGGTGCCTATCAAGTGGATGGCATTGGAATCAATT" "TTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGTGAGTCATAATCCTGA" "TGCTAATGAGTTTGTACTGAGGCCAAGCTGG") == r000021.sequence[:] def testAccessByPosition(self): ft = IndexedFastaReader(self.FASTAPATH) r000001 = ft[0] assert "<IndexedFastaRecord: ref000001|EGFR_Exon_2>" == repr(r000001) firstTwo = ft[:2] assert [ft[0], ft[1]] == firstTwo lastTwo = ft[-2:] assert [ft[-2], ft[-1]] == lastTwo def testSlice(self): ft = IndexedFastaReader(self.FASTAPATH) r000021 = ft["ref000021|EGFR_Exon_22"] sequence = r000021.sequence assert "CACTGCCTCA" == sequence[0:10] assert "GCCAAGCTGG" == sequence[-10:] assert "G" == sequence[-1] assert "T" == sequence[-3] assert "C" == sequence[0] assert "A" == sequence[1] def test_dosLineEndingsFasta(self): fr = FastaReader(data.getDosFormattedFasta()) frEntries = list(fr) ft = IndexedFastaReader(data.getDosFormattedFasta()) ftEntries = list(ft) assert len(frEntries) == len(ftEntries) for (frE, ftE) in zip(frEntries, ftEntries): assert frE.header == ftE.header assert frE.sequence == ftE.sequence[:] def test_readWeirdFastaIndex(self): f = IndexedFastaReader(data.getWeird()) entries = list(f) assert 1 == len(entries) assert "chr1" == entries[0].header assert "acgtacgtacgtact" == entries[0].sequence[:]
def _run_and_validate(args, file_name): self._run_cmd_with_output(cmd, ofn) aset = AlignmentSet(ofn, strict=True) for res in aset.externalResources: assert res.reference == otherdata.getFasta()