def test_get_from_taxonomy_db(self): """EUtils access from taxonomy database should work""" #note: this is more fragile than the nucleotide databases g = EUtils(db='taxonomy', rettype='Brief', retmode='text') ids = '9606[taxid] OR 28901[taxid]' result = sorted(g[ids].read().splitlines()) self.assertEqual(result, ['H**o sapiens', 'Salmonella enterica'])
def test_get_list(self): """EUtils access of a list should work""" g = EUtils(db='protein', rettype='gp') result = g['NP_003320', 'NP_003321', 'NP_003322'].read() lines = result.splitlines() is_locus = lambda x: x.startswith('LOCUS') loci = filter(is_locus, lines) self.assertEqual(len(loci), 3)
def test_get_slice(self): """EUtils access of a slice should work""" g = EUtils(db='protein', rettype='gp', retmax=1) result = g['NP_003320':'NP_003322'].read() lines = result.splitlines() is_locus = lambda x: x.startswith('LOCUS') loci = filter(is_locus, lines) self.assertEqual(len(loci), 3) #EUtils access of a slice should work, while limiting #the esearch term length g = EUtils(db='protein', rettype='gp', retmax=1, url_limit=2) result = g['NP_003320':'NP_003322'].read() lines = result.splitlines() is_locus = lambda x: x.startswith('LOCUS') loci = filter(is_locus, lines) self.assertEqual(len(loci), 3)
def test_query_max_recs_gt_retmax(self): """EUtils should stop query at max_recs when max_recs > retmax""" g = EUtils(db='protein', rettype='gi', max_recs=5, DEBUG=False, retmax=3) result = g['h**o[organism] AND myh7'].read().splitlines() self.assertEqual(len(result), 5)
def test_get_from_taxonomy_db(self): """EUtils access from taxonomy database should work""" #note: this is more fragile than the nucleotide databases g = EUtils(db='taxonomy', rettype='xml', retmode='xml') ids = '9606[taxid] OR 28901[taxid]' fh = StringIO() fh.write(g[ids].read()) fh.seek(0) data = parse_taxonomy_using_elementtree_xml_parse(fh) result = sorted([item['ScientificName'] for item in data]) self.assertEqual(result, ['H**o sapiens', 'Salmonella enterica'])
def test_parse_taxonomy_using_elementtree_xml_parse(self): """parse_taxonomy_using_elementtree_xml_parse should return taxonomy associated information""" g = EUtils(db='taxonomy', rettype='xml', retmode='xml') ids = '28901[taxid]' fh = StringIO() fh.write(g[ids].read()) fh.seek(0) data = parse_taxonomy_using_elementtree_xml_parse(fh)[0] obs = (data['Lineage'],data['TaxId'],data['ScientificName'],\ data['Rank']) exp = ('cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales; Enterobacteriaceae; Salmonella',\ '28901','Salmonella enterica','species') self.assertEqual(obs, exp)
def fetch_seq(gid,gb_fn): e = EUtils(db="nucleotide", rettype="gb") outfile = open(gb_fn,'w') outfile.write(e[gid].read()) outfile.close()
def test_query_retmax(self): """EUtils should join results taken retmax at a time""" g = EUtils(db='protein', rettype='gi', retmax=3, DEBUG=False) result = g['h**o[organism] AND myh7'].read().splitlines() assert len(result) > 1 assert '83304912' in result #gi of human myh7
def test_query(self): """EUtils access via a query should work""" g = EUtils(db='protein', rettype='gi', retmax=100) result = g['h**o[organism] AND erf1[ti]'].read().splitlines() assert '5499721' in result #gi of human eRF1
def test_simple_get(self): """EUtils simple access of an item should work""" g = EUtils(db='protein', rettype='gp') result = g['NP_003320'].read() assert result.startswith('LOCUS') assert 'NP_003320' in result
#!/usr/bin/env python # taken from http://pycogent.sourceforge.net/ from cogent.db.ncbi import EUtils db = EUtils(db="protein", rettype="gp") query = '"VWf"[gene] AND h**o[orgn]' records = db[query].readlines() import re from cogent.parse.genbank import RichGenbankParser parser = RichGenbankParser(records) acc2seq = {} rows = [] for accession, seq in parser: if len(seq) < 2800: continue species = seq.Info.species.split() seq_name = "%s.%s" % (species[0][0] + species[1][:3], accession) acc2seq[seq_name] = seq print acc2seq from cogent import LoadSeqs seqs = LoadSeqs(data=acc2seq, aligned=False) sh = seqs.NamedSeqs['Hsap.P04275'] print sh.toFasta() print sh.Info.taxonomy