Exemple #1
0
    def test_cmscan_task_multithreaded(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('rnaseP-bsu.fa')
            cm = datadir('rnaseP-eubact.c.cm')
            out_single = str(tmpdir.join('single'))
            out_multi = str(tmpdir.join('multi'))

            for n_threads in (2, 3, 4, 5):

                db_task = CMPressTask().task(cm, params=self.cmpress_cfg)
                aln_tasks_single = CMScanTask().task(transcript,
                                                     out_single,
                                                     cm,
                                                     cutoff=1.0,
                                                     n_threads=1)
                aln_tasks_multi = CMScanTask().task(transcript,
                                                    out_multi,
                                                    cm,
                                                    cutoff=1.0,
                                                    n_threads=n_threads)
                run_tasks([db_task, aln_tasks_single], ['run'])
                run_task(aln_tasks_multi)

                alns_single = pd.concat(InfernalParser(out_single))
                alns_multi = pd.concat(InfernalParser(out_multi))

                assert all(alns_single['e_value'].sort_values() == \
                           alns_multi['e_value'].sort_values())
Exemple #2
0
    def test_hmmscan_task_multithreaded(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            prot = datadir('20aa-alitest.fa')
            hmm = datadir('20aa.hmm')
            out_single = str(tmpdir.join('out-single'))
            out_multi = str(tmpdir.join('out-multi'))

            for n_threads in (2, 3, 4, 5):
                db_task = HMMPressTask().task(hmm, params=self.hmmpress_cfg)
                aln_task_single = HMMScanTask().task(prot,
                                                     out_single,
                                                     hmm,
                                                     cutoff=1.0,
                                                     n_threads=1)
                aln_task_multi = HMMScanTask().task(prot,
                                                    out_multi,
                                                    hmm,
                                                    cutoff=1.0,
                                                    n_threads=n_threads)
                run_tasks([db_task, aln_task_single], ['run'])
                run_tasks([aln_task_multi], ['run'])
                print(os.listdir(), file=sys.stderr)

                print(open(out_single).read())
                alns_single = pd.concat(HMMerParser(out_single))
                alns_multi = pd.concat(HMMerParser(out_multi))

                assert all(alns_single['domain_i_evalue'].sort_values().reset_index(drop=True) == \
                           alns_multi['domain_i_evalue'].sort_values().reset_index(drop=True))
Exemple #3
0
    def test_annotate_threaded(self, tmpdir, datadir):
        '''Test the --n_threads argument.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.50.fa')
            args = ['annotate', transcripts, '--n_threads', '2']
            status, out, err = run(args)
    def test_longorfs_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('test-transcript.fa')
            exp_orf = datadir('test-transcript-orf.pep')

            task = TransDecoderLongOrfsTask().task(transcript,
                                                   params=self.longorfs_cfg)
            run_tasks([task], ['run'])
            output_dir = transcript + '.transdecoder_dir'

            exp_pep = open(exp_orf).read()

            pep_fn = os.path.join(output_dir, 'longest_orfs.pep')
            assert os.path.isfile(pep_fn)
            pep = open(pep_fn).read()

            assert exp_pep in pep
Exemple #5
0
    def test_hmmscan_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            prot = datadir('test-protein.fa')
            hmm = datadir('test-profile.hmm')
            out = str(tmpdir.join('test.out'))
                        
            db_task = HMMPressTask().task(hmm, params=self.hmmpress_cfg)
            aln_task = HMMScanTask().task(prot, out, hmm, 
                                          cutoff=1.0, n_threads=1)

            run_tasks([db_task, aln_task], ['run'])
            print(os.listdir(), file=sys.stderr)
            aln = open(out).read()
            print(aln)

            assert aln.count('accession') == 2
            assert 'i-Evalue' in aln
Exemple #6
0
    def test_annotate_full(self, tmpdir, datadir):
        '''Run a full annotation and verify the results.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.50.fa')
            exp_gff3 = datadir('pom.50.fa.dammit.gff3.full')
            exp_fasta = datadir('pom.50.fa.dammit.fasta.full')

            args = ['annotate', transcripts, '--full']
            status, out, err = run(args)

            outdir = '{0}.dammit'.format(transcripts)
            gff3_fn = os.path.join(outdir, 'pom.50.fa.dammit.gff3')
            fasta_fn = os.path.join(outdir, 'pom.50.fa.dammit.fasta')

            assert compare_gff(gff3_fn, exp_gff3)
            assert open(fasta_fn).read() == open(exp_fasta).read()
Exemple #7
0
    def test_annotate_evalue(self, tmpdir, datadir):
        '''Test the --evalue argument and verify the results.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.50.fa')
            exp_gff3 = datadir('pom.50.fa.dammit.gff3.evalue10')
            exp_fasta = datadir('pom.50.fa.dammit.fasta.evalue10')

            args = ['annotate', transcripts, '--evalue', '10.0']
            status, out, err = run(args)

            outdir = '{0}.dammit'.format(transcripts)
            gff3_fn = os.path.join(outdir, 'pom.50.fa.dammit.gff3')
            fasta_fn = os.path.join(outdir, 'pom.50.fa.dammit.fasta')

            assert compare_gff(gff3_fn, exp_gff3)
            assert open(fasta_fn).read() == open(exp_fasta).read()
Exemple #8
0
    def test_annotate_multiple_user_databases(self, tmpdir, datadir):
        '''Test that multiple user databases work.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.50.fa')
            pep = datadir('pep.fa')
            pep2 = datadir('odb_subset.fa')
            exp_gff3 = datadir('pom.50.fa.dammit.gff3.udb')
            exp_fasta = datadir('pom.50.fa.dammit.fasta.udb')

            args = ['annotate', '--quick',
                    transcripts, '--user-databases', pep, pep2,
                    '--verbosity', '2']
            status, out, err = run(args)

            outdir = '{0}.dammit'.format(transcripts)

            assert status == 0
Exemple #9
0
    def test_annotate_dbdir(self, tmpdir, datadir):
        '''Test that --database-dir works.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.50.fa')

            db_dir = os.environ['DAMMIT_DB_DIR']
            args = ['annotate', '--quick', transcripts, '--database-dir', db_dir]
            status, out, err = run(args)
Exemple #10
0
    def test_annotate_dbdir_fail(self, tmpdir, datadir):
        '''Test annotation with a faulty database directory.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.50.fa')

            args = ['annotate', transcripts, '--database-dir', '.']
            status, out, err = run(args, fail_ok=True)
            assert 'install databases to continue' in out
            assert status == 2
Exemple #11
0
    def test_annotate_outdir(self, tmpdir, datadir):
        '''Test that the --output-dir argument works.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.50.fa')
            outdir = 'test_out'
            args = ['annotate', '--quick', transcripts, '-o', outdir]
            status, out, err = run(args)
            fn = os.path.join(outdir, os.path.basename(transcripts))
            assert os.path.isfile(fn)
Exemple #12
0
    def test_annotate_basic(self, tmpdir, datadir):
        '''Run a basic annotation and verify the results.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.single.fa')
            exp_gff3= datadir('pom.single.fa.dammit.gff3')
            exp_fasta = datadir('pom.single.fa.dammit.fasta')

            args = ['annotate', transcripts]
            status, out, err = run(args)

            outdir = '{0}.dammit'.format(transcripts)
            gff3_fn = os.path.join(outdir, 'pom.single.fa.dammit.gff3')
            fasta_fn = os.path.join(outdir, 'pom.single.fa.dammit.fasta')

            print(os.listdir(outdir))
            print(gff3_fn, fasta_fn)
            assert compare_gff(gff3_fn, exp_gff3)
            assert open(fasta_fn).read() == open(exp_fasta).read()
Exemple #13
0
    def test_cmscan_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('test-transcript.fa')
            cm = datadir('test-covariance-model.cm')
            out = str(tmpdir.join('test.out'))

            db_task = CMPressTask().task(cm, params=self.cmpress_cfg)
            aln_task = CMScanTask().task(transcript,
                                         out,
                                         cm,
                                         cutoff=1.0,
                                         n_threads=1)
            run_tasks([db_task, aln_task], ['run'])
            print(os.listdir(), file=sys.stderr)
            aln = ''.join(open(out).readlines())
            print(aln, file=sys.stderr)

            # TODO: better correctness check
            assert aln.count('accession') == 2
            assert 'E-value' in aln
Exemple #14
0
    def test_annotate_no_rename(self, tmpdir, datadir):
        '''Test the --no-rename argument.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.50.fa')
            exp_gff3 = datadir('pom.50.fa.dammit.gff3.norename')
            exp_fasta = datadir('pom.50.fa.dammit.fasta.norename')

            args = ['annotate', transcripts, '--no-rename']
            status, out, err = run(args)
            assert status == 0

            outdir = '{0}.dammit'.format(transcripts)
            fn = os.path.join(outdir, os.path.basename(transcripts))
            assert os.path.isfile(fn)
            contents = open(fn).read()
            assert 'SPAC212' in contents

            gff3_fn = os.path.join(outdir, 'pom.50.fa.dammit.gff3')
            assert compare_gff(gff3_fn, exp_gff3)
Exemple #15
0
    def test_annotate_user_databases(self, tmpdir, datadir):
        '''Test that a user database works.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.single.fa')
            pep = datadir('pep.fa')
            exp_gff3 = datadir('pom.single.fa.dammit.gff3.udb')
            exp_fasta = datadir('pom.single.fa.dammit.fasta.udb')

            args = ['annotate', transcripts, '--user-databases', pep,
                    '--verbosity', '2']
            status, out, err = run(args)

            outdir = '{0}.dammit'.format(transcripts)
            gff3_fn = os.path.join(outdir, 'pom.single.fa.dammit.gff3')
            fasta_fn = os.path.join(outdir, 'pom.single.fa.dammit.fasta')

            assert status == 0
            assert compare_gff(gff3_fn, exp_gff3)
            assert open(fasta_fn).read() == open(exp_fasta).read()
Exemple #16
0
    def test_cmpress_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            tf = datadir('test-covariance-model.cm')
            task = CMPressTask().task(tf, params=self.cmpress_cfg)
            run_tasks([task], ['run'])
            status = check_status(task)
            print(os.listdir(), file=sys.stderr)

            for ext in self.extensions:
                assert os.path.isfile(tf + ext)

            assert status.status == 'up-to-date'
Exemple #17
0
    def test_hmmpress_task_existing(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            tf = datadir('test-profile.hmm')
            for ext in self.extensions:
                touch(tf + ext)
            task = HMMPressTask().task(tf)
            run_tasks([task], ['run'])
            print(os.listdir(), file=sys.stderr)
            print(task, file=sys.stderr)
            status = check_status(task)

            assert status.status == 'up-to-date'
    def test_predict_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('pom.50.fa')
            pfam = datadir('test-protein-x-pfam-a.tbl')

            orf_task = TransDecoderLongOrfsTask().task(
                transcript, params=self.longorfs_cfg)
            pred_task = TransDecoderPredictTask().task(transcript,
                                                       pfam,
                                                       params=self.predict_cfg)
            run_tasks([orf_task, pred_task], ['run'])

            pprint(tmpdir.listdir())
            for ext in self.extensions:
                fn = transcript + '.transdecoder' + ext
                assert os.path.isfile(fn)
                contents = open(fn).read()
                if ext == '.gff3':
                    assert 'mRNA' in contents
                    assert 'gene' in contents
                    assert 'CDS' in contents
                    assert 'three_prime_UTR' in contents
                    assert 'exon' in contents
Exemple #19
0
    def test_annotate_no_rename(self, tmpdir, datadir):
        '''Test the --no_rename argument.
        '''

        with tmpdir.as_cwd():
            transcripts = datadir('pom.single.fa')

            args = ['annotate', transcripts, '--no_rename']
            status, out, err = run(args)

            outdir = '{0}.dammit'.format(transcripts)
            fn = os.path.join(outdir, os.path.basename(transcripts))
            assert os.path.isfile(fn)

            contents = open(fn).read()
            assert 'SPAC212' in contents

            assert status == 0