def test_contigset_consolidate(self): #build set to merge outdir = tempfile.mkdtemp(suffix="dataset-unittest") inFas = os.path.join(outdir, 'infile.fasta') outFas1 = os.path.join(outdir, 'tempfile1.fasta') outFas2 = os.path.join(outdir, 'tempfile2.fasta') # copy fasta reference to hide fai and ensure FastaReader is used backticks('cp {i} {o}'.format( i=ReferenceSet(data.getXml(9)).toExternalFiles()[0], o=inFas)) rs1 = ContigSet(inFas) singletons = ['A.baumannii.1', 'A.odontolyticus.1'] double = 'B.cereus.1' reader = rs1.resourceReaders()[0] exp_double = rs1.get_contig(double) exp_singles = [rs1.get_contig(name) for name in singletons] # todo: modify the names first: with FastaWriter(outFas1) as writer: writer.writeRecord(exp_singles[0]) writer.writeRecord(exp_double.name + '_10_20', exp_double.sequence) with FastaWriter(outFas2) as writer: writer.writeRecord(exp_double.name + '_0_10', exp_double.sequence + 'ATCGATCGATCG') writer.writeRecord(exp_singles[1]) exp_double_seq = ''.join([exp_double.sequence, 'ATCGATCGATCG', exp_double.sequence]) exp_single_seqs = [rec.sequence for rec in exp_singles] acc_file = ContigSet(outFas1, outFas2) acc_file.induceIndices() log.debug(acc_file.toExternalFiles()) self.assertEqual(len(acc_file), 4) self.assertEqual(len(list(acc_file)), 4) acc_file.consolidate() log.debug(acc_file.toExternalFiles()) # open acc and compare to exp for name, seq in zip(singletons, exp_single_seqs): self.assertEqual(acc_file.get_contig(name).sequence[:], seq) self.assertEqual(acc_file.get_contig(double).sequence[:], exp_double_seq) self.assertEqual(len(acc_file._openReaders), 1) self.assertEqual(len(acc_file.index), 3) self.assertEqual(len(acc_file._indexMap), 3) self.assertEqual(len(acc_file), 3) self.assertEqual(len(list(acc_file)), 3) # test merge: acc1 = ContigSet(outFas1) acc2 = ContigSet(outFas2) acc3 = acc1 + acc2
def test_contigset_consolidate(self): #build set to merge outdir = tempfile.mkdtemp(suffix="dataset-unittest") inFas = os.path.join(outdir, 'infile.fasta') outFas1 = os.path.join(outdir, 'tempfile1.fasta') outFas2 = os.path.join(outdir, 'tempfile2.fasta') # copy fasta reference to hide fai and ensure FastaReader is used backticks('cp {i} {o}'.format(i=ReferenceSet( data.getXml(9)).toExternalFiles()[0], o=inFas)) rs1 = ContigSet(inFas) singletons = ['A.baumannii.1', 'A.odontolyticus.1'] double = 'B.cereus.1' reader = rs1.resourceReaders()[0] exp_double = rs1.get_contig(double) exp_singles = [rs1.get_contig(name) for name in singletons] # todo: modify the names first: with FastaWriter(outFas1) as writer: writer.writeRecord(exp_singles[0]) writer.writeRecord(exp_double.name + '_10_20', exp_double.sequence) with FastaWriter(outFas2) as writer: writer.writeRecord(exp_double.name + '_0_10', exp_double.sequence + 'ATCGATCGATCG') writer.writeRecord(exp_singles[1]) exp_double_seq = ''.join( [exp_double.sequence, 'ATCGATCGATCG', exp_double.sequence]) exp_single_seqs = [rec.sequence for rec in exp_singles] acc_file = ContigSet(outFas1, outFas2) acc_file.induceIndices() log.debug(acc_file.toExternalFiles()) self.assertEqual(len(acc_file), 4) self.assertEqual(len(list(acc_file)), 4) acc_file.consolidate() log.debug(acc_file.toExternalFiles()) # open acc and compare to exp for name, seq in zip(singletons, exp_single_seqs): self.assertEqual(acc_file.get_contig(name).sequence[:], seq) self.assertEqual( acc_file.get_contig(double).sequence[:], exp_double_seq) self.assertEqual(len(acc_file._openReaders), 1) self.assertEqual(len(acc_file.index), 3) self.assertEqual(len(acc_file._indexMap), 3) self.assertEqual(len(acc_file), 3) self.assertEqual(len(list(acc_file)), 3) # test merge: acc1 = ContigSet(outFas1) acc2 = ContigSet(outFas2) acc3 = acc1 + acc2
def test_contigset_write(self): fasta = upstreamData.getLambdaFasta() ds = ContigSet(fasta) assert isinstance(ds.resourceReaders()[0], IndexedFastaReader) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'test.fasta') w = FastaWriter(outfn) for rec in ds: w.writeRecord(rec) w.close() fas = FastaReader(outfn) for rec in fas: # make sure a __repr__ didn't slip through: assert not rec.sequence.startswith('<')
def test_contigset_write(self): fasta = upstreamData.getLambdaFasta() ds = ContigSet(fasta) self.assertTrue(isinstance(ds.resourceReaders()[0], IndexedFastaReader)) outdir = tempfile.mkdtemp(suffix="dataset-unittest") outfn = os.path.join(outdir, 'test.fasta') w = FastaWriter(outfn) for rec in ds: w.writeRecord(rec) w.close() fas = FastaReader(outfn) for rec in fas: # make sure a __repr__ didn't slip through: self.assertFalse(rec.sequence.startswith('<'))
def test_len_fastq(self): fn = ('/pbi/dept/secondary/siv/testdata/SA3-RS/' 'lambda/2590980/0008/Analysis_Results/' 'm141115_075238_ethan_c100699872550000001' '823139203261572_s1_p0.1.subreads.fastq') fq_out = tempfile.NamedTemporaryFile(suffix=".fastq").name with open(fq_out, 'w') as fqh: with open(fn, 'r') as fih: for line in itertools.islice(fih, 24): fqh.write(line) cset = ContigSet(fq_out) assert not cset.isIndexed assert isinstance(cset.resourceReaders()[0], FastqReader) assert sum(1 for _ in cset) == sum(1 for _ in FastqReader(fq_out)) assert sum(1 for _ in cset) == 6
def test_len_fastq(self): fn = ('/pbi/dept/secondary/siv/testdata/SA3-RS/' 'lambda/2590980/0008/Analysis_Results/' 'm141115_075238_ethan_c100699872550000001' '823139203261572_s1_p0.1.subreads.fastq') fq_out = tempfile.NamedTemporaryFile(suffix=".fastq").name with open(fq_out, 'w') as fqh: with open(fn, 'r') as fih: for line in itertools.islice(fih, 24): fqh.write(line) cset = ContigSet(fq_out) self.assertFalse(cset.isIndexed) self.assertTrue(isinstance(cset.resourceReaders()[0], FastqReader)) self.assertEqual(sum(1 for _ in cset), sum(1 for _ in FastqReader(fq_out))) self.assertEqual(sum(1 for _ in cset), 6)