def test_lastal_task_multithreaded(tmpdir, datadir): with tmpdir.as_cwd(): for n_threads in (3,4,5): prot = datadir('test-protein.fa') tr = datadir('pom.50.fa') out_single = tmpdir.join('out-single').strpath out_multi = tmpdir.join('out-multi').strpath db_task = lastdb_task(prot, prot) aln_task_single = lastal_task(tr, prot, out_single, translate=True, cutoff=None) aln_task_multi = lastal_task(tr, prot, out_multi, translate=True, cutoff=None, n_threads=n_threads) run_tasks([db_task, aln_task_multi, aln_task_single], ['run']) alns_single = MafParser(out_single).read() alns_multi = MafParser(out_multi).read() assert all(alns_single['E'].sort_values() == \ alns_multi['E'].sort_values())
def test_crbl(tmpdir, datadir): query = datadir('sacPom.cdna.fa') database = datadir('sacPom.pep.fa') out_fn = str(tmpdir.join('test.csv')) args = ['crbl', '-q', query, '-d', database, '-o', out_fn] status, out, err = runscript('shmlast', args, directory=str(tmpdir)) assert status == 0 assert tmpdir.ensure(out_fn)
def test_reciprocal_best_hits(datadir): query_df = pd.read_csv(datadir('query.maf.csv')) db_df = pd.read_csv(datadir('db.maf.csv')) expected_df = pd.read_csv(datadir('reciprocals.expected.csv')) bh = BestHits() results_df = bh.reciprocal_best_hits(query_df, db_df, inplace=False) assert check_df_equals(results_df, expected_df)
def test_rbl(tmpdir, datadir): query = datadir('sacPom.cdna.fa') database = datadir('sacPom.pep.fa') print(query, database, tmpdir) args = ['rbl', '-q', query, '-d', database] runscript('shmlast', args, directory=str(tmpdir)) assert tmpdir.ensure('sacPom.cdna.fa.x.sacPom.pep.fa.rbl.csv')
def test_reciprocal_best_hits(datadir): query_df = pd.read_csv(datadir('query.maf.csv')) db_df = pd.read_csv(datadir('db.maf.csv')) expected_df = pd.read_csv(datadir('reciprocals.expected.csv')) bh = BestHits() results_df = bh.reciprocal_best_hits(query_df, db_df, inplace=False) assert check_df_equals(results_df, expected_df)
def test_crbl_tasks_empty(tmpdir, datadir): with tmpdir.as_cwd(): input_fa = datadir('pom.single.fa') pep_fa = datadir('odb_subset.fa') results_fn = tmpdir.join('result.csv').strpath crbl = CRBL(input_fa, pep_fa, results_fn) result = run_tasks([tsk for tsk in crbl.tasks()], ['run']) assert result == 0
def test_besthits_non_inplace(datadir): '''Test BestHits.best_hits with inplace=False ''' input_df = pd.read_csv(datadir('query.maf.csv')) expected_df = pd.read_csv(datadir('besthits.expected.csv')) bh = BestHits() results_df = bh.best_hits(input_df, inplace=False) assert check_df_equals(expected_df, results_df)
def test_besthits_non_inplace(datadir): '''Test BestHits.best_hits with inplace=False ''' input_df = pd.read_csv(datadir('query.maf.csv')) expected_df = pd.read_csv(datadir('besthits.expected.csv')) bh = BestHits() results_df = bh.best_hits(input_df, inplace=False) assert check_df_equals(expected_df, results_df)
def test_crbl_tasks_empty(tmpdir, datadir): with tmpdir.as_cwd(): input_fa = datadir('pom.single.fa') pep_fa = datadir('odb_subset.fa') results_fn = tmpdir.join('result.csv').strpath crbl = CRBL(input_fa, pep_fa, results_fn) result = run_tasks([tsk for tsk in crbl.tasks()], ['run']) assert result == 0
def test_rbl(tmpdir, datadir, n_threads, benchmark): query = datadir('sacPom.cdna.fa') database = datadir('sacPom.pep.fa') args = ['rbl', '--n_threads', str(n_threads), '-q', query, '-d', database] status, out, err = benchmark.pedantic(runscript, args=('shmlast', args), kwargs={'directory': str(tmpdir)}, iterations=1, rounds=1) assert status == 0 assert tmpdir.ensure('sacPom.cdna.fa.x.sacPom.pep.fa.rbl.csv')
def test_rbl(tmpdir, datadir, n_threads, benchmark): query = datadir('sacPom.cdna.fa') database = datadir('sacPom.pep.fa') args = ['rbl', '--n_threads', str(n_threads), '-q', query, '-d', database] status, out, err = benchmark.pedantic(runscript, args=('shmlast', args), kwargs={'directory': str(tmpdir)}, iterations=1, rounds=1) assert status == 0 assert tmpdir.ensure('sacPom.cdna.fa.x.sacPom.pep.fa.rbl.csv')
def test_lastal_task_nucl_x_prot(tmpdir, datadir): with tmpdir.as_cwd(): prot = datadir('test-protein.fa') tr = datadir('test-transcript.fa') out = tmpdir.join('test-out').strpath db_task = lastdb_task(prot, prot) aln_task = lastal_task(tr, prot, out, translate=True, cutoff=None) run_tasks([db_task, aln_task], ['run']) aln = ''.join(open(out).readlines()) print(aln, file=sys.stderr) assert 'SPAC212_RecQ_type_DNA_helicase_PROTEIN' in aln assert 'SPAC212_RecQ_type_DNA_helicase_TRANSCRIPT' in aln assert 'lambda' in aln, 'lambda missing, wrong LAST version?'
def lastdb_dir(tmpdir_factory, datadir): d = tmpdir_factory.mktemp('sacpom_lastdb') with d.as_cwd(): data = datadir('sacPom.pep.fa') task = lastdb_task(data, data, prot=True) result = run_tasks([task], ['run']) assert result == 0 return d
def test_lastdb_task_existing(tmpdir, datadir): with tmpdir.as_cwd(): tf = datadir('test-protein.fa') for ext in LASTDB_EXTENSIONS: touch(tf + ext) task = lastdb_task(tf, tf, prot=True) run_tasks([task], ['run']) print(task, file=sys.stderr) status = check_status(task) assert status.status == 'up-to-date'
def test_lastdb_task_prot(tmpdir, datadir): with tmpdir.as_cwd(): tf = datadir('test-protein.fa') task = lastdb_task(tf, tf, prot=True) run_tasks([task], ['run']) status = check_status(task) for ext in LASTDB_EXTENSIONS: assert os.path.isfile(tf + ext) assert status.status == 'up-to-date'
def test_lastdb_task_nucl(tmpdir, datadir): with tmpdir.as_cwd(): tf = datadir('test-transcript.fa') task = lastdb_task(tf, tf, prot=False) run_tasks([task], ['run']) status = check_status(task) print('PATH:', os.environ['PATH'], file=sys.stderr) for ext in LASTDB_EXTENSIONS: assert os.path.isfile(tf + ext) assert status.status == 'up-to-date'
def test_lastal_task_large(datadir, lastdb_dir, tmpdir_factory, benchmark, n_threads): with tmpdir_factory.mktemp('THREADS_{0}'.format(n_threads)).as_cwd(): query = datadir('sacPom.cdna.fa') database = str(lastdb_dir.join('sacPom.pep.fa')) output = 'out' aln_task = lastal_task(query, database, output, translate=True, cutoff=None, n_threads=n_threads) result = benchmark.pedantic(run_tasks, args=([aln_task], ['run']), iterations=1, rounds=1) assert result == 0
def test_lastal_task_uptodate(tmpdir, datadir): with tmpdir.as_cwd(): prot = datadir('test-protein.fa') out = tmpdir.join('test-out').strpath db_task = lastdb_task(prot, prot) aln_task = lastal_task(prot, prot, out, translate=False, cutoff=None) # Run it once run_tasks([db_task, aln_task], ['run']) # Now run again and check the status #run_tasks(aln_tasks, ['run']) print(aln_task) status = check_status(aln_task, tasks=[aln_task, db_task]) assert status.status == 'up-to-date'