Example #1
0
 def test_get_from_taxonomy_db(self):
     """EUtils access from taxonomy database should work"""
     #note: this is more fragile than the nucleotide databases
     g = EUtils(db='taxonomy', rettype='Brief', retmode='text')
     ids = '9606[taxid] OR 28901[taxid]'
     result = sorted(g[ids].read().splitlines())
     self.assertEqual(result, ['H**o sapiens', 'Salmonella enterica'])
Example #2
0
 def test_get_list(self):
     """EUtils access of a list should work"""
     g = EUtils(db='protein', rettype='gp')
     result = g['NP_003320', 'NP_003321', 'NP_003322'].read()
     lines = result.splitlines()
     is_locus = lambda x: x.startswith('LOCUS')
     loci = filter(is_locus, lines)
     self.assertEqual(len(loci), 3)
Example #3
0
    def test_get_slice(self):
        """EUtils access of a slice should work"""
        g = EUtils(db='protein', rettype='gp', retmax=1)
        result = g['NP_003320':'NP_003322'].read()
        lines = result.splitlines()
        is_locus = lambda x: x.startswith('LOCUS')
        loci = filter(is_locus, lines)
        self.assertEqual(len(loci), 3)

        #EUtils access of a slice should work, while limiting
        #the esearch term length
        g = EUtils(db='protein', rettype='gp', retmax=1, url_limit=2)
        result = g['NP_003320':'NP_003322'].read()
        lines = result.splitlines()
        is_locus = lambda x: x.startswith('LOCUS')
        loci = filter(is_locus, lines)
        self.assertEqual(len(loci), 3)
Example #4
0
 def test_query_max_recs_gt_retmax(self):
     """EUtils should stop query at max_recs when max_recs > retmax"""
     g = EUtils(db='protein',
                rettype='gi',
                max_recs=5,
                DEBUG=False,
                retmax=3)
     result = g['h**o[organism] AND myh7'].read().splitlines()
     self.assertEqual(len(result), 5)
Example #5
0
 def test_get_from_taxonomy_db(self):
     """EUtils access from taxonomy database should work"""
     #note: this is more fragile than the nucleotide databases
     g = EUtils(db='taxonomy', rettype='xml', retmode='xml')
     ids = '9606[taxid] OR 28901[taxid]'
     fh = StringIO()
     fh.write(g[ids].read())
     fh.seek(0)
     data = parse_taxonomy_using_elementtree_xml_parse(fh)
     result = sorted([item['ScientificName'] for item in data])
     self.assertEqual(result, ['H**o sapiens', 'Salmonella enterica'])
Example #6
0
 def test_parse_taxonomy_using_elementtree_xml_parse(self):
     """parse_taxonomy_using_elementtree_xml_parse should return taxonomy associated information"""
     g = EUtils(db='taxonomy', rettype='xml', retmode='xml')
     ids = '28901[taxid]'
     fh = StringIO()
     fh.write(g[ids].read())
     fh.seek(0)
     data = parse_taxonomy_using_elementtree_xml_parse(fh)[0]
     obs = (data['Lineage'],data['TaxId'],data['ScientificName'],\
            data['Rank'])
     exp = ('cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales; Enterobacteriaceae; Salmonella',\
           '28901','Salmonella enterica','species')
     self.assertEqual(obs, exp)
Example #7
0
def fetch_seq(gid,gb_fn):
    e = EUtils(db="nucleotide", rettype="gb")
    outfile = open(gb_fn,'w')
    outfile.write(e[gid].read())
    outfile.close()
Example #8
0
 def test_query_retmax(self):
     """EUtils should join results taken retmax at a time"""
     g = EUtils(db='protein', rettype='gi', retmax=3, DEBUG=False)
     result = g['h**o[organism] AND myh7'].read().splitlines()
     assert len(result) > 1
     assert '83304912' in result  #gi of human myh7
Example #9
0
 def test_query(self):
     """EUtils access via a query should work"""
     g = EUtils(db='protein', rettype='gi', retmax=100)
     result = g['h**o[organism] AND erf1[ti]'].read().splitlines()
     assert '5499721' in result  #gi of human eRF1
Example #10
0
 def test_simple_get(self):
     """EUtils simple access of an item should work"""
     g = EUtils(db='protein', rettype='gp')
     result = g['NP_003320'].read()
     assert result.startswith('LOCUS')
     assert 'NP_003320' in result
Example #11
0
#!/usr/bin/env python
# taken from http://pycogent.sourceforge.net/
from cogent.db.ncbi import EUtils

db = EUtils(db="protein", rettype="gp")
query = '"VWf"[gene] AND h**o[orgn]'
records = db[query].readlines()
import re
from cogent.parse.genbank import RichGenbankParser

parser = RichGenbankParser(records)
acc2seq = {}
rows = []
for accession, seq in parser:
    if len(seq) < 2800:
        continue
    species = seq.Info.species.split()
    seq_name = "%s.%s" % (species[0][0] + species[1][:3], accession)
    acc2seq[seq_name] = seq
print acc2seq
from cogent import LoadSeqs

seqs = LoadSeqs(data=acc2seq, aligned=False)
sh = seqs.NamedSeqs['Hsap.P04275']
print sh.toFasta()
print sh.Info.taxonomy