def test_cmscan_task_multithreaded(self, tmpdir, datadir): with tmpdir.as_cwd(): transcript = datadir('rnaseP-bsu.fa') cm = datadir('rnaseP-eubact.c.cm') out_single = str(tmpdir.join('single')) out_multi = str(tmpdir.join('multi')) for n_threads in (2, 3, 4, 5): db_task = CMPressTask().task(cm, params=self.cmpress_cfg) aln_tasks_single = CMScanTask().task(transcript, out_single, cm, cutoff=1.0, n_threads=1) aln_tasks_multi = CMScanTask().task(transcript, out_multi, cm, cutoff=1.0, n_threads=n_threads) run_tasks([db_task, aln_tasks_single], ['run']) run_task(aln_tasks_multi) alns_single = pd.concat(InfernalParser(out_single)) alns_multi = pd.concat(InfernalParser(out_multi)) assert all(alns_single['e_value'].sort_values() == \ alns_multi['e_value'].sort_values())
def test_hmmscan_task_multithreaded(self, tmpdir, datadir): with tmpdir.as_cwd(): prot = datadir('20aa-alitest.fa') hmm = datadir('20aa.hmm') out_single = str(tmpdir.join('out-single')) out_multi = str(tmpdir.join('out-multi')) for n_threads in (2, 3, 4, 5): db_task = HMMPressTask().task(hmm, params=self.hmmpress_cfg) aln_task_single = HMMScanTask().task(prot, out_single, hmm, cutoff=1.0, n_threads=1) aln_task_multi = HMMScanTask().task(prot, out_multi, hmm, cutoff=1.0, n_threads=n_threads) run_tasks([db_task, aln_task_single], ['run']) run_tasks([aln_task_multi], ['run']) print(os.listdir(), file=sys.stderr) print(open(out_single).read()) alns_single = pd.concat(HMMerParser(out_single)) alns_multi = pd.concat(HMMerParser(out_multi)) assert all(alns_single['domain_i_evalue'].sort_values().reset_index(drop=True) == \ alns_multi['domain_i_evalue'].sort_values().reset_index(drop=True))
def test_annotate_threaded(self, tmpdir, datadir): '''Test the --n_threads argument. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.50.fa') args = ['annotate', transcripts, '--n_threads', '2'] status, out, err = run(args)
def test_longorfs_task(self, tmpdir, datadir): with tmpdir.as_cwd(): transcript = datadir('test-transcript.fa') exp_orf = datadir('test-transcript-orf.pep') task = TransDecoderLongOrfsTask().task(transcript, params=self.longorfs_cfg) run_tasks([task], ['run']) output_dir = transcript + '.transdecoder_dir' exp_pep = open(exp_orf).read() pep_fn = os.path.join(output_dir, 'longest_orfs.pep') assert os.path.isfile(pep_fn) pep = open(pep_fn).read() assert exp_pep in pep
def test_hmmscan_task(self, tmpdir, datadir): with tmpdir.as_cwd(): prot = datadir('test-protein.fa') hmm = datadir('test-profile.hmm') out = str(tmpdir.join('test.out')) db_task = HMMPressTask().task(hmm, params=self.hmmpress_cfg) aln_task = HMMScanTask().task(prot, out, hmm, cutoff=1.0, n_threads=1) run_tasks([db_task, aln_task], ['run']) print(os.listdir(), file=sys.stderr) aln = open(out).read() print(aln) assert aln.count('accession') == 2 assert 'i-Evalue' in aln
def test_annotate_full(self, tmpdir, datadir): '''Run a full annotation and verify the results. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.50.fa') exp_gff3 = datadir('pom.50.fa.dammit.gff3.full') exp_fasta = datadir('pom.50.fa.dammit.fasta.full') args = ['annotate', transcripts, '--full'] status, out, err = run(args) outdir = '{0}.dammit'.format(transcripts) gff3_fn = os.path.join(outdir, 'pom.50.fa.dammit.gff3') fasta_fn = os.path.join(outdir, 'pom.50.fa.dammit.fasta') assert compare_gff(gff3_fn, exp_gff3) assert open(fasta_fn).read() == open(exp_fasta).read()
def test_annotate_evalue(self, tmpdir, datadir): '''Test the --evalue argument and verify the results. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.50.fa') exp_gff3 = datadir('pom.50.fa.dammit.gff3.evalue10') exp_fasta = datadir('pom.50.fa.dammit.fasta.evalue10') args = ['annotate', transcripts, '--evalue', '10.0'] status, out, err = run(args) outdir = '{0}.dammit'.format(transcripts) gff3_fn = os.path.join(outdir, 'pom.50.fa.dammit.gff3') fasta_fn = os.path.join(outdir, 'pom.50.fa.dammit.fasta') assert compare_gff(gff3_fn, exp_gff3) assert open(fasta_fn).read() == open(exp_fasta).read()
def test_annotate_multiple_user_databases(self, tmpdir, datadir): '''Test that multiple user databases work. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.50.fa') pep = datadir('pep.fa') pep2 = datadir('odb_subset.fa') exp_gff3 = datadir('pom.50.fa.dammit.gff3.udb') exp_fasta = datadir('pom.50.fa.dammit.fasta.udb') args = ['annotate', '--quick', transcripts, '--user-databases', pep, pep2, '--verbosity', '2'] status, out, err = run(args) outdir = '{0}.dammit'.format(transcripts) assert status == 0
def test_annotate_dbdir(self, tmpdir, datadir): '''Test that --database-dir works. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.50.fa') db_dir = os.environ['DAMMIT_DB_DIR'] args = ['annotate', '--quick', transcripts, '--database-dir', db_dir] status, out, err = run(args)
def test_annotate_dbdir_fail(self, tmpdir, datadir): '''Test annotation with a faulty database directory. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.50.fa') args = ['annotate', transcripts, '--database-dir', '.'] status, out, err = run(args, fail_ok=True) assert 'install databases to continue' in out assert status == 2
def test_annotate_outdir(self, tmpdir, datadir): '''Test that the --output-dir argument works. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.50.fa') outdir = 'test_out' args = ['annotate', '--quick', transcripts, '-o', outdir] status, out, err = run(args) fn = os.path.join(outdir, os.path.basename(transcripts)) assert os.path.isfile(fn)
def test_annotate_basic(self, tmpdir, datadir): '''Run a basic annotation and verify the results. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.single.fa') exp_gff3= datadir('pom.single.fa.dammit.gff3') exp_fasta = datadir('pom.single.fa.dammit.fasta') args = ['annotate', transcripts] status, out, err = run(args) outdir = '{0}.dammit'.format(transcripts) gff3_fn = os.path.join(outdir, 'pom.single.fa.dammit.gff3') fasta_fn = os.path.join(outdir, 'pom.single.fa.dammit.fasta') print(os.listdir(outdir)) print(gff3_fn, fasta_fn) assert compare_gff(gff3_fn, exp_gff3) assert open(fasta_fn).read() == open(exp_fasta).read()
def test_cmscan_task(self, tmpdir, datadir): with tmpdir.as_cwd(): transcript = datadir('test-transcript.fa') cm = datadir('test-covariance-model.cm') out = str(tmpdir.join('test.out')) db_task = CMPressTask().task(cm, params=self.cmpress_cfg) aln_task = CMScanTask().task(transcript, out, cm, cutoff=1.0, n_threads=1) run_tasks([db_task, aln_task], ['run']) print(os.listdir(), file=sys.stderr) aln = ''.join(open(out).readlines()) print(aln, file=sys.stderr) # TODO: better correctness check assert aln.count('accession') == 2 assert 'E-value' in aln
def test_annotate_no_rename(self, tmpdir, datadir): '''Test the --no-rename argument. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.50.fa') exp_gff3 = datadir('pom.50.fa.dammit.gff3.norename') exp_fasta = datadir('pom.50.fa.dammit.fasta.norename') args = ['annotate', transcripts, '--no-rename'] status, out, err = run(args) assert status == 0 outdir = '{0}.dammit'.format(transcripts) fn = os.path.join(outdir, os.path.basename(transcripts)) assert os.path.isfile(fn) contents = open(fn).read() assert 'SPAC212' in contents gff3_fn = os.path.join(outdir, 'pom.50.fa.dammit.gff3') assert compare_gff(gff3_fn, exp_gff3)
def test_annotate_user_databases(self, tmpdir, datadir): '''Test that a user database works. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.single.fa') pep = datadir('pep.fa') exp_gff3 = datadir('pom.single.fa.dammit.gff3.udb') exp_fasta = datadir('pom.single.fa.dammit.fasta.udb') args = ['annotate', transcripts, '--user-databases', pep, '--verbosity', '2'] status, out, err = run(args) outdir = '{0}.dammit'.format(transcripts) gff3_fn = os.path.join(outdir, 'pom.single.fa.dammit.gff3') fasta_fn = os.path.join(outdir, 'pom.single.fa.dammit.fasta') assert status == 0 assert compare_gff(gff3_fn, exp_gff3) assert open(fasta_fn).read() == open(exp_fasta).read()
def test_cmpress_task(self, tmpdir, datadir): with tmpdir.as_cwd(): tf = datadir('test-covariance-model.cm') task = CMPressTask().task(tf, params=self.cmpress_cfg) run_tasks([task], ['run']) status = check_status(task) print(os.listdir(), file=sys.stderr) for ext in self.extensions: assert os.path.isfile(tf + ext) assert status.status == 'up-to-date'
def test_hmmpress_task_existing(self, tmpdir, datadir): with tmpdir.as_cwd(): tf = datadir('test-profile.hmm') for ext in self.extensions: touch(tf + ext) task = HMMPressTask().task(tf) run_tasks([task], ['run']) print(os.listdir(), file=sys.stderr) print(task, file=sys.stderr) status = check_status(task) assert status.status == 'up-to-date'
def test_predict_task(self, tmpdir, datadir): with tmpdir.as_cwd(): transcript = datadir('pom.50.fa') pfam = datadir('test-protein-x-pfam-a.tbl') orf_task = TransDecoderLongOrfsTask().task( transcript, params=self.longorfs_cfg) pred_task = TransDecoderPredictTask().task(transcript, pfam, params=self.predict_cfg) run_tasks([orf_task, pred_task], ['run']) pprint(tmpdir.listdir()) for ext in self.extensions: fn = transcript + '.transdecoder' + ext assert os.path.isfile(fn) contents = open(fn).read() if ext == '.gff3': assert 'mRNA' in contents assert 'gene' in contents assert 'CDS' in contents assert 'three_prime_UTR' in contents assert 'exon' in contents
def test_annotate_no_rename(self, tmpdir, datadir): '''Test the --no_rename argument. ''' with tmpdir.as_cwd(): transcripts = datadir('pom.single.fa') args = ['annotate', transcripts, '--no_rename'] status, out, err = run(args) outdir = '{0}.dammit'.format(transcripts) fn = os.path.join(outdir, os.path.basename(transcripts)) assert os.path.isfile(fn) contents = open(fn).read() assert 'SPAC212' in contents assert status == 0