コード例 #1
0
    def test_contigset_consolidate(self):
        #build set to merge
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")

        inFas = os.path.join(outdir, 'infile.fasta')
        outFas1 = os.path.join(outdir, 'tempfile1.fasta')
        outFas2 = os.path.join(outdir, 'tempfile2.fasta')

        # copy fasta reference to hide fai and ensure FastaReader is used
        backticks('cp {i} {o}'.format(
                      i=ReferenceSet(data.getXml(9)).toExternalFiles()[0],
                      o=inFas))
        rs1 = ContigSet(inFas)

        singletons = ['A.baumannii.1', 'A.odontolyticus.1']
        double = 'B.cereus.1'
        reader = rs1.resourceReaders()[0]
        exp_double = rs1.get_contig(double)
        exp_singles = [rs1.get_contig(name) for name in singletons]

        # todo: modify the names first:
        with FastaWriter(outFas1) as writer:
            writer.writeRecord(exp_singles[0])
            writer.writeRecord(exp_double.name + '_10_20', exp_double.sequence)
        with FastaWriter(outFas2) as writer:
            writer.writeRecord(exp_double.name + '_0_10',
                               exp_double.sequence + 'ATCGATCGATCG')
            writer.writeRecord(exp_singles[1])

        exp_double_seq = ''.join([exp_double.sequence,
                                  'ATCGATCGATCG',
                                  exp_double.sequence])
        exp_single_seqs = [rec.sequence for rec in exp_singles]

        acc_file = ContigSet(outFas1, outFas2)
        acc_file.induceIndices()
        log.debug(acc_file.toExternalFiles())
        self.assertEqual(len(acc_file), 4)
        self.assertEqual(len(list(acc_file)), 4)
        acc_file.consolidate()
        log.debug(acc_file.toExternalFiles())

        # open acc and compare to exp
        for name, seq in zip(singletons, exp_single_seqs):
            self.assertEqual(acc_file.get_contig(name).sequence[:], seq)
        self.assertEqual(acc_file.get_contig(double).sequence[:],
                         exp_double_seq)

        self.assertEqual(len(acc_file._openReaders), 1)
        self.assertEqual(len(acc_file.index), 3)
        self.assertEqual(len(acc_file._indexMap), 3)
        self.assertEqual(len(acc_file), 3)
        self.assertEqual(len(list(acc_file)), 3)

        # test merge:
        acc1 = ContigSet(outFas1)
        acc2 = ContigSet(outFas2)
        acc3 = acc1 + acc2
コード例 #2
0
    def test_contigset_consolidate(self):
        #build set to merge
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")

        inFas = os.path.join(outdir, 'infile.fasta')
        outFas1 = os.path.join(outdir, 'tempfile1.fasta')
        outFas2 = os.path.join(outdir, 'tempfile2.fasta')

        # copy fasta reference to hide fai and ensure FastaReader is used
        backticks('cp {i} {o}'.format(i=ReferenceSet(
            data.getXml(9)).toExternalFiles()[0],
                                      o=inFas))
        rs1 = ContigSet(inFas)

        singletons = ['A.baumannii.1', 'A.odontolyticus.1']
        double = 'B.cereus.1'
        reader = rs1.resourceReaders()[0]
        exp_double = rs1.get_contig(double)
        exp_singles = [rs1.get_contig(name) for name in singletons]

        # todo: modify the names first:
        with FastaWriter(outFas1) as writer:
            writer.writeRecord(exp_singles[0])
            writer.writeRecord(exp_double.name + '_10_20', exp_double.sequence)
        with FastaWriter(outFas2) as writer:
            writer.writeRecord(exp_double.name + '_0_10',
                               exp_double.sequence + 'ATCGATCGATCG')
            writer.writeRecord(exp_singles[1])

        exp_double_seq = ''.join(
            [exp_double.sequence, 'ATCGATCGATCG', exp_double.sequence])
        exp_single_seqs = [rec.sequence for rec in exp_singles]

        acc_file = ContigSet(outFas1, outFas2)
        acc_file.induceIndices()
        log.debug(acc_file.toExternalFiles())
        self.assertEqual(len(acc_file), 4)
        self.assertEqual(len(list(acc_file)), 4)
        acc_file.consolidate()
        log.debug(acc_file.toExternalFiles())

        # open acc and compare to exp
        for name, seq in zip(singletons, exp_single_seqs):
            self.assertEqual(acc_file.get_contig(name).sequence[:], seq)
        self.assertEqual(
            acc_file.get_contig(double).sequence[:], exp_double_seq)

        self.assertEqual(len(acc_file._openReaders), 1)
        self.assertEqual(len(acc_file.index), 3)
        self.assertEqual(len(acc_file._indexMap), 3)
        self.assertEqual(len(acc_file), 3)
        self.assertEqual(len(list(acc_file)), 3)

        # test merge:
        acc1 = ContigSet(outFas1)
        acc2 = ContigSet(outFas2)
        acc3 = acc1 + acc2
コード例 #3
0
 def test_contigset_write(self):
     fasta = upstreamData.getLambdaFasta()
     ds = ContigSet(fasta)
     assert isinstance(ds.resourceReaders()[0], IndexedFastaReader)
     outdir = tempfile.mkdtemp(suffix="dataset-unittest")
     outfn = os.path.join(outdir, 'test.fasta')
     w = FastaWriter(outfn)
     for rec in ds:
         w.writeRecord(rec)
     w.close()
     fas = FastaReader(outfn)
     for rec in fas:
         # make sure a __repr__ didn't slip through:
         assert not rec.sequence.startswith('<')
コード例 #4
0
 def test_contigset_write(self):
     fasta = upstreamData.getLambdaFasta()
     ds = ContigSet(fasta)
     self.assertTrue(isinstance(ds.resourceReaders()[0],
                                IndexedFastaReader))
     outdir = tempfile.mkdtemp(suffix="dataset-unittest")
     outfn = os.path.join(outdir, 'test.fasta')
     w = FastaWriter(outfn)
     for rec in ds:
         w.writeRecord(rec)
     w.close()
     fas = FastaReader(outfn)
     for rec in fas:
         # make sure a __repr__ didn't slip through:
         self.assertFalse(rec.sequence.startswith('<'))
コード例 #5
0
 def test_len_fastq(self):
     fn = ('/pbi/dept/secondary/siv/testdata/SA3-RS/'
           'lambda/2590980/0008/Analysis_Results/'
           'm141115_075238_ethan_c100699872550000001'
           '823139203261572_s1_p0.1.subreads.fastq')
     fq_out = tempfile.NamedTemporaryFile(suffix=".fastq").name
     with open(fq_out, 'w') as fqh:
         with open(fn, 'r') as fih:
             for line in itertools.islice(fih, 24):
                 fqh.write(line)
     cset = ContigSet(fq_out)
     assert not cset.isIndexed
     assert isinstance(cset.resourceReaders()[0], FastqReader)
     assert sum(1 for _ in cset) == sum(1 for _ in FastqReader(fq_out))
     assert sum(1 for _ in cset) == 6
コード例 #6
0
 def test_len_fastq(self):
     fn = ('/pbi/dept/secondary/siv/testdata/SA3-RS/'
           'lambda/2590980/0008/Analysis_Results/'
           'm141115_075238_ethan_c100699872550000001'
           '823139203261572_s1_p0.1.subreads.fastq')
     fq_out = tempfile.NamedTemporaryFile(suffix=".fastq").name
     with open(fq_out, 'w') as fqh:
         with open(fn, 'r') as fih:
             for line in itertools.islice(fih, 24):
                 fqh.write(line)
     cset = ContigSet(fq_out)
     self.assertFalse(cset.isIndexed)
     self.assertTrue(isinstance(cset.resourceReaders()[0], FastqReader))
     self.assertEqual(sum(1 for _ in cset),
                      sum(1 for _ in FastqReader(fq_out)))
     self.assertEqual(sum(1 for _ in cset), 6)