def setUp(self): super().setUp() tests = ('blastp', 'WP_009885814.faa') self.blast = (tests[0], get_data_path(tests[1]), _get_named_data_path('%s.diamond' % tests[1])) seqs = skbio.read(_get_named_data_path('cache.faa'), format='fasta') self.cache = DiamondCache(list(seqs))
def setUp(self): self.tmp_dir = mkdtemp() self.db_fp = 'tigrfam_v15.0.db' self.obs_db_fp = join(self.tmp_dir, self.db_fp) self.exp_db_fp = _get_named_data_path(self.db_fp) self.d = dirname(self.exp_db_fp) self.pressed_fp = 'tigrfam_v15.0.hmm.h3f' self.obs_pressed_fp = join(self.tmp_dir, self.pressed_fp) self.exp_pressed_fp = _get_named_data_path(self.pressed_fp)
def setUp(self): self.tmp_dir = mkdtemp() self.positive_fps = [_get_named_data_path(i) for i in [ # modified from NC_018498.gbk 'NC_018498_partial_1.gbk', 'NC_018498_partial_1.gbk', 'NC_018498_partial_2.gbk', ]] self.positive_params = [ {'-p': 'meta'}, {'-p': 'meta', '-f': 'gff'}, {'-p': 'single'}] self.positive_suffices = [ {'-o': 'gbk', '-a': 'faa', '-d': 'fna'}, {'-o': 'gff', '-a': 'faa', '-d': 'fna'}, {'-o': 'gbk', '-a': 'faa', '-d': 'fna'}] self.positive_outdir = [ 'test_1', 'test_2', 'test_3'] self.negative_fps = [get_data_path(i) for i in [ 'empty', 'whitespace_only']] self.parse_fp = _get_named_data_path('parse_test.faa') self.parse_exp = [ {Feature(type_='CDS', id='1_1', right_partial_=False, left_partial_=False, location='686..1828', translation='MKILINKSELNKILKKMNNVIISNNKIKPHHSYFLIEAKEKEINFYANNEYFSVKCNLNKYFLITSKSEPELKQILVPSR*', note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.236"', rc_=False): [(685, 1828)], Feature(type_='CDS', id='1_2', location='1828..>2757', translation='MNLYDLLELPTTASIKEIKIAYKRLAKRYHPDVNKLGSQTFVEINNAYSILSDPNQKEKYFNYKTQHFID', right_partial_=True, left_partial_=False, note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.271"', rc_=False): [(1827, 2757)]}, {Feature(type_='CDS', id='2_1', location='21577..22128', right_partial_=False, left_partial_=False, translation='MKKTSPFILRRTKNKVLKELPKKIITDIYVELSEEHQKLYDKQKTDGLKEIKESDAKNALFDV*', note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.272"', rc_=False): [(21576, 22128)]}]
def setUp(self): super().setUp() cases = [('blastp', 'WP_009885814.faa'), ('blastx', 'WP_009885814.fna')] Test = namedtuple('Test', ['aligner', 'input', 'exp']) self.tests = [ Test(i[0], get_data_path(i[1]), _get_named_data_path('%s.diamond' % i[1])) for i in cases ] seqs = skbio.read(_get_named_data_path('cache.faa'), format='fasta') self.cache = DiamondCache(list(seqs))
def setUp(self): super().setUp() suffices = ['tblout'] Files = namedtuple('Files', suffices) Case = namedtuple('Case', ['query', 'obs', 'exp']) self.cases = [] for f in ['Pfam_B_1.faa']: fns = ['{}.{}'.format(f, s) for s in suffices] obs_files = Files(*[join(self.tmp_dir, fn) for fn in fns]) exp_files = Files(*[_get_named_data_path(fn) for fn in fns]) self.cases.append( Case(_get_named_data_path(f), obs_files, exp_files))
def setUp(self): super().setUp() cases = [('blastp', 'WP_009885814.faa'), ('blastx', 'WP_009885814.fna')] Test = namedtuple('Test', ['aligner', 'input', 'exp']) self.tests = [Test(i[0], get_data_path(i[1]), _get_named_data_path('%s.diamond' % i[1])) for i in cases] seqs = skbio.read(_get_named_data_path('cache.faa'), format='fasta') self.cache = DiamondCache(list(seqs))
def setUp(self): self.parse_fp = _get_named_data_path('parse_test.faa') self.parse_exp = [{ Feature(type_='CDS', id='1_1', right_partial_=False, left_partial_=False, location='686..1828', translation='MKILINKSELNKILKKMNNVIISNNKIKPHHSYFLIEAKEKEINFYANNEYFSVKCNLNKYFLITSKSEPELKQILVPSR*', note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.236"', rc_=False): [(685, 1828)], Feature(type_='CDS', id='1_2', location='1828..>2757', translation='MNLYDLLELPTTASIKEIKIAYKRLAKRYHPDVNKLGSQTFVEINNAYSILSDPNQKEKYFNYKTQHFID', right_partial_=True, left_partial_=False, note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.271"', rc_=False): [(1827, 2757)] }, { Feature(type_='CDS', id='2_1', location='21577..22128', right_partial_=False, left_partial_=False, translation='MKKTSPFILRRTKNKVLKELPKKIITDIYVELSEEHQKLYDKQKTDGLKEIKESDAKNALFDV*', note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.272"', rc_=False): [(21576, 22128)] }]
def setUp(self): self.tmp_dir = mkdtemp() self.negative_fps = [ get_data_path(i) for i in ['empty', 'whitespace_only'] ] Case = namedtuple('Case', ['query', 'kwargs', 'outdir']) self.cases = [ Case(i, j, k) for i, j, k in zip( [ _get_named_data_path(i) for i in # modified from NC_018498.gbk [ 'NC_018498_partial_1.gbk', 'NC_018498_partial_1.gbk', 'NC_018498_partial_2.gbk' ] ], [{ '-p': 'meta', '-f': 'gbk' }, { '-p': 'meta' }, { '-p': 'single' }], ['test_1', 'test_2', 'test_3']) ]
def test_run(self): # taken from MinCED test files fn = 'Aquifex_aeolicus_VF5.fna' query = _get_named_data_path(fn) exp = splitext(query)[0] params = [{ '-searchWL': '8', 'gff': True, 'gffFull': False, 'spacers': False }, { '-searchWL': '8', '-minNR': '3', 'gffFull': True, 'gff': False, 'spacers': False }, { 'gff': True, 'spacers': True, 'gffFull': False }] exp_fps = ['.gff', '.gffFull', '.gff'] for param, s in zip(params, exp_fps): res = run(query, self.tmp_dir, **param) self.assertTrue(cmp(res.params['out'].value, exp + s))
def setUp(self): self.parse_fp = _get_named_data_path('parse_test.faa') self.parse_exp = [ {Feature(type_='CDS', id='1_1', right_partial_=False, left_partial_=False, location='686..1828', translation='MKILINKSELNKILKKMNNVIISNNKIKPHHSYFLIEAKEKEINFYANNEYFSVKCNLNKYFLITSKSEPELKQILVPSR*', note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.236"', rc_=False): [(685, 1828)], Feature(type_='CDS', id='1_2', location='1828..>2757', translation='MNLYDLLELPTTASIKEIKIAYKRLAKRYHPDVNKLGSQTFVEINNAYSILSDPNQKEKYFNYKTQHFID', right_partial_=True, left_partial_=False, note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.271"', rc_=False): [(1827, 2757)]}, {Feature(type_='CDS', id='2_1', location='21577..22128', right_partial_=False, left_partial_=False, translation='MKKTSPFILRRTKNKVLKELPKKIITDIYVELSEEHQKLYDKQKTDGLKEIKESDAKNALFDV*', note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.272"', rc_=False): [(21576, 22128)]}]
def setUp(self): super().setUp() tests = [('blastp', 'WP_009885814.faa'), ('blastx', 'WP_009885814.fna')] self.blast = [ (i[0], get_data_path(i[1]), _get_named_data_path('%s.diamond' % i[1])) for i in tests]
def test_run(self): for case in self.cases: exp_d = _get_named_data_path(case.outdir) obs_d = join(self.tmp_dir, case.outdir) run(case.query, obs_d, **case.kwargs) for f in listdir(exp_d): self.assertTrue( cmp(join(obs_d, f), join(exp_d, f), shallow=False))
def setUp(self): super().setUp() cases = [('blastp', 'WP_009885814.faa'), ('blastx', 'WP_009885814.fna')] Test = namedtuple('Test', ['aligner', 'input', 'exp']) self.tests = [ Test(i[0], get_data_path(i[1]), _get_named_data_path(i[1])) for i in cases ]
def setUp(self): super().setUp() cases = [('blastp', 'WP_009885814.faa'), ('blastx', 'WP_009885814.fna')] Test = namedtuple('Test', ['aligner', 'input', 'exp']) self.tests = [Test(i[0], get_data_path(i[1]), _get_named_data_path(i[1])) for i in cases]
def _test_eq(self): for fp in self.uniref_res: for suffix in ['fasta', 'dmnd']: fp = '.'.join([fp, suffix]) obs = join(self.tmp_dir, fp) exp = _get_named_data_path(fp) if exists(exp): with open(obs) as o, open(exp) as e: self.assertEqual(o.read(), e.read()) remove(obs)
def setUp(self): self.tmp_dir = mkdtemp() self.faa = _get_named_data_path('db.faa') self.obs_db = join(self.tmp_dir, 'db.dmnd') self.exp_db = _get_named_data_path('db.dmnd') cases = [['blastp', 'test.faa']] # ['blastx', 'test.fna']] suffices = ['daa', 'tab', 'sam', 'sam_df', 'sam_best', 'sam_io5', 'sam_io12'] Files = namedtuple('Files', suffices) Case = namedtuple('Case', ['aligner', 'query', 'obs', 'exp']) self.cases = [] for b, f in cases: fns = ['{}.{}'.format(f, s) for s in suffices] obs_files = Files(*[join(self.tmp_dir, fn) for fn in fns]) exp_files = Files(*[_get_named_data_path(fn) for fn in fns]) self.cases.append(Case(b, _get_named_data_path(f), obs_files, exp_files)) self.neg_fp = [get_data_path(i) for i in ['empty', 'whitespace_only']]
def test_pred_run(self): for fp, params, outdir in zip(self.positive_fps, self.positive_params, self.positive_outdir): exp_d = _get_named_data_path(outdir) obs_d = join(self.tmp_dir, outdir) pred = FeaturePred(None, obs_d) res = pred.run(fp, params) self.assertEqual(res['ExitStatus'], 0) for f in listdir(exp_d): self.assertTrue( cmp(join(obs_d, f), join(exp_d, f), shallow=False)) res['StdOut'].close() res['StdErr'].close()
def setUp(self): self.tmp_dir = mkdtemp() self.faa = _get_named_data_path('db.faa') self.obs_db = join(self.tmp_dir, 'db.dmnd') self.exp_db = _get_named_data_path('db.dmnd') cases = [['blastp', 'test.faa']] # ['blastx', 'test.fna']] suffices = [ 'daa', 'tab', 'sam', 'sam_df', 'sam_best', 'sam_io5', 'sam_io12' ] Files = namedtuple('Files', suffices) Case = namedtuple('Case', ['aligner', 'query', 'obs', 'exp']) self.cases = [] for b, f in cases: fns = ['{}.{}'.format(f, s) for s in suffices] obs_files = Files(*[join(self.tmp_dir, fn) for fn in fns]) exp_files = Files(*[_get_named_data_path(fn) for fn in fns]) self.cases.append( Case(b, _get_named_data_path(f), obs_files, exp_files)) self.neg_fp = [get_data_path(i) for i in ['empty', 'whitespace_only']]
def test_run(self): # taken from MinCED test files fn = 'Aquifex_aeolicus_VF5.fna' query = _get_named_data_path(fn) exp = splitext(query)[0] params = [ {'-searchWL': '8', 'gff': True, 'gffFull': False, 'spacers': False}, {'-searchWL': '8', '-minNR': '3', 'gffFull': True, 'gff': False, 'spacers': False}, {'gff': True, 'spacers': True, 'gffFull': False}] exp_fps = ['.gff', '.gffFull', '.gff'] for param, s in zip(params, exp_fps): res = run(query, self.tmp_dir, **param) self.assertTrue( cmp(res.params['out'].value, exp + s))
def setUp(self): self.tmp_dir = mkdtemp() self.db_fp = 'uniprotkb.db' self.obs_db_fp = join(self.tmp_dir, self.db_fp) self.exp_db_fp = _get_named_data_path(self.db_fp) self.uniprotkb = [_get_named_data_path('uniprot_sprot.xml.gz'), _get_named_data_path('uniprot_trembl.xml.gz'), 12] self.d = dirname(self.exp_db_fp) self.uniref_fp = _get_named_data_path('uniref100.fasta.gz') self.uniref_res = [ 'Swiss-Prot_Archaea', 'Swiss-Prot_Bacteria', 'Swiss-Prot_Viruses', 'Swiss-Prot_Eukaryota', 'Swiss-Prot_other', 'TrEMBL_Archaea', 'TrEMBL_Bacteria', 'TrEMBL_Viruses', 'TrEMBL_Eukaryota', 'TrEMBL_other', '_other']
def setUp(self): self.tmp_dir = mkdtemp() self.negative_fps = [get_data_path(i) for i in ['empty', 'whitespace_only']] Case = namedtuple('Case', ['query', 'kwargs', 'outdir']) self.cases = [Case(i, j, k) for i, j, k in zip([_get_named_data_path(i) for i in # modified from NC_018498.gbk ['NC_018498_partial_1.gbk', 'NC_018498_partial_1.gbk', 'NC_018498_partial_2.gbk']], [{'-p': 'meta', '-f': 'gbk'}, {'-p': 'meta'}, {'-p': 'single'}], ['test_1', 'test_2', 'test_3'])]
def setUp(self): tests = [('blastp', 'WP_009885814.faa'), ('blastx', 'WP_009885814.fna')] self.blast = [ (i[0], get_data_path(i[1]), _get_named_data_path(('%s.sam' % i[1]))) for i in tests] self.exp = \ pd.DataFrame({ 'sseqid': ['UniRef100_P47599', 'UniRef100_B2HPZ3', 'UniRef100_A4T166'], 'evalue': [2.1e-229, 2.9e-58, 3.3e-57], 'bitscore': [2009, 533, 524], 'sequence': [ 'MQSHKILVVNAGSSSIKFQLFNDKKQVLAKGLCERIFIDGFFKLEFNQK' 'KIEEKVQFNDHNLAVKHFLNALKKNKIITELSEIGLIGHRVVQGANYFT' 'DAVLVDTHSLAKIKEFIKLAPLHNKPEADVIEIFLKEIKTAKNVAVFDT' 'TFHTTIPRENYLYAVPENXEKNNLVRRYGFHGTSYKYINEFLEKKFNKK' 'PLNLIVCHLGNGASVCAIKQGKSLNTSMGFTPLEGLIMGTRSGDIDPAI' 'VSYIAEQQKLSCNDVVNELNKKSGMFAITGSSDMRDIFDKPEINDIAIK' 'MYVNRVADYIAKYLNQLSGEIDSLVFTGGVGENASYCVQLIIEKVASLG' 'FKTNSNLFGNYQDSSLISTNESKYQIFRVRTNEELMIVEDALRVSTNIK' 'K', 'ILVVNAGSSSIKFQLFNDKKQVLAKGLCERIFIDGFFKLEFNQKKIEEK' 'VQFNDHNLAVKHFLNALKKNKIITELSEIGLIGHRVVQGANYFTDAVLV' 'DTHSLAKIKEFIKLAPLHNKPEADVIEIFLKEIKTAKNVAVFDTTFHTT' 'IPRENYLYAVPENXEKNNLVRRYGFHGTSYKYINEFLEKKFNKKPLNLI' 'VCHLGNGASVCAIKQGKSLNTSMGFTPLEGLIMGTRSGDIDPAIVSYIA' 'EQQKLSCNDVVNELNKKSGMFAITGSSDMRDIFDKPEINDIAIKMYVNR' 'VADYIAKYLNQLSGEIDSLVFTGGVGENASYCVQLIIEKVASLGFKTNS' 'NLFGNYQDSSLISTNESKYQIFRVRTNEELMIVEDALRV', 'ILVVNAGSSSIKFQLFNDKKQVLAKGLCERIFIDGFFKLEFNQKKIEEK' 'VQFNDHNLAVKHFLNALKKNKIITELSEIGLIGHRVVQGANYFTDAVLV' 'DTHSLAKIKEFIKLAPLHNKPEADVIEIFLKEIKTAKNVAVFDTTFHTT' 'IPRENYLYAVPENXEKNNLVRRYGFHGTSYKYINEFLEKKFNKKPLNLI' 'VCHLGNGASVCAIKQGKSLNTSMGFTPLEGLIMGTRSGDIDPAIVSYIA' 'EQQKLSCNDVVNELNKKSGMFAITGSSDMRDIFDKPEINDIAIKMYVNR' 'VADYIAKYLNQLSGEIDSLVFTGGVGENASYCVQLIIEKVASLGFKTNS' 'NLFGNYQDSSLISTNESKYQIFRVRTNEELMI'] })
def setUp(self): self.tmp_dir = mkdtemp() cases = ['WP_009885814.fna', 'WP_009885814.faa'] Test = namedtuple('Test', ['input', 'exp', 'obs']) self.sam_tests = [Test(_get_named_data_path('%s.sam' % i), _get_named_data_path('%s.txt' % i), join(self.tmp_dir, '%s.txt' % i)) for i in cases] self.filter_tests = [Test(_get_named_data_path('%s.diamond' % i), _get_named_data_path('%s.best' % i), join(self.tmp_dir, '%s.best')) for i in cases] self.filter_tests2 = [Test(_get_named_data_path('%s.sam' % i), _get_named_data_path('%s.idcov' % i), join(self.tmp_dir, '%s.idcov')) for i in cases]
def setUp(self): self.tmp_dir = mkdtemp() cases = ['WP_009885814.fna', 'WP_009885814.faa'] Test = namedtuple('Test', ['input', 'exp', 'obs']) self.sam_tests = [ Test(_get_named_data_path('%s.sam' % i), _get_named_data_path('%s.txt' % i), join(self.tmp_dir, '%s.txt' % i)) for i in cases ] self.filter_tests = [ Test(_get_named_data_path('%s.diamond' % i), _get_named_data_path('%s.best' % i), join(self.tmp_dir, '%s.best')) for i in cases ] self.filter_tests2 = [ Test(_get_named_data_path('%s.sam' % i), _get_named_data_path('%s.idcov' % i), join(self.tmp_dir, '%s.idcov')) for i in cases ]
def setUp(self): self.tmp_dir = mkdtemp() self.db_fa = _get_named_data_path("db.faa") self.db = _get_named_data_path("db.dmnd") self.neg_fp = [get_data_path(i) for i in ["empty", "whitespace_only"]]
def setUp(self): self.tmp_dir = mkdtemp() self.hmm_fp = _get_named_data_path('Pfam_B_1.hmm') self.negative_fps = [get_data_path(i) for i in ['empty', 'whitespace_only']]
def setUp(self): super().setUp() tests = [("blastp", "WP_009885814.faa"), ("blastx", "WP_009885814.fna")] self.blast = [(i[0], get_data_path(i[1]), _get_named_data_path("%s.diamond" % i[1])) for i in tests]
def setUp(self): self.tmp_dir = mkdtemp() self.positive_fps = [ _get_named_data_path(i) for i in [ # modified from NC_018498.gbk 'NC_018498_partial_1.gbk', 'NC_018498_partial_1.gbk', 'NC_018498_partial_2.gbk', ] ] self.positive_params = [{ '-p': 'meta' }, { '-p': 'meta', '-f': 'gff' }, { '-p': 'single' }] self.positive_suffices = [{ '-o': 'gbk', '-a': 'faa', '-d': 'fna' }, { '-o': 'gff', '-a': 'faa', '-d': 'fna' }, { '-o': 'gbk', '-a': 'faa', '-d': 'fna' }] self.positive_outdir = ['test_1', 'test_2', 'test_3'] self.negative_fps = [ get_data_path(i) for i in ['empty', 'whitespace_only'] ] self.parse_fp = _get_named_data_path('parse_test.faa') self.parse_exp = [{ Feature(type_='CDS', id='1_1', right_partial_=False, left_partial_=False, location='686..1828', translation='MKILINKSELNKILKKMNNVIISNNKIKPHHSYFLIEAKEKEINFYANNEYFSVKCNLNKYFLITSKSEPELKQILVPSR*', note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.236"', rc_=False): [(685, 1828)], Feature(type_='CDS', id='1_2', location='1828..>2757', translation='MNLYDLLELPTTASIKEIKIAYKRLAKRYHPDVNKLGSQTFVEINNAYSILSDPNQKEKYFNYKTQHFID', right_partial_=True, left_partial_=False, note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.271"', rc_=False): [(1827, 2757)] }, { Feature(type_='CDS', id='2_1', location='21577..22128', right_partial_=False, left_partial_=False, translation='MKKTSPFILRRTKNKVLKELPKKIITDIYVELSEEHQKLYDKQKTDGLKEIKESDAKNALFDV*', note='"start_type=ATG;rbs_motif=None;rbs_spacer=None;gc_cont=0.272"', rc_=False): [(21576, 22128)] }]
def setUp(self): self.tmp_dir = mkdtemp() self.db_fa = _get_named_data_path('db.faa') self.db = _get_named_data_path('db.dmnd') self.neg_fp = [get_data_path(i) for i in ['empty', 'whitespace_only']]
def test_run(self): for case in self.cases: run(self.tmp_dir, case.query, self.exp_db, aligner=case.aligner, tmpdir=self.tmp_dir) obs = join(self.tmp_dir, 'db.sam') self.assertTrue( cmp(obs, _get_named_data_path(case.exp.sam), shallow=False))