def findName(accession): s = EUtils() theID = None geneOrProtein = "gene" # Check gene database res = s.ESearch("gene", accession) if len(res["idlist"]) > 0: # Get the ID in the "gene" database theID = res["idlist"][0] # If that fails, check the protein database if theID == None: res = s.ESearch("protein", accession) if len(res["idlist"]) > 0: theID = res["idlist"][0] geneOrProtein = "protein" # Couldn't find in either database if not theID: print("ERROR: couldn't find link for %s" % accession) # Get link to the corresponding ID in the Taxonomy database link = s.ELink(db="taxonomy", dbfrom=geneOrProtein, id=theID, retmode="json") taxID = json.loads(link)["linksets"][0]["linksetdbs"][0]["links"][0] # Download taxonomy record tax = s.EFetch(db="taxonomy", id=taxID) #print(tax) tree = ElementTree.fromstring(tax) xmlP = parseString(tax) #print(xmlP.toprettyxml()) taxonTag = tree.find("Taxon") sciName = taxonTag.find("ScientificName") return sciName.text
def to_genbank(self, retmax=10000): """Draft: from a TaxID, uses EUtils to retrieve the GenBank identifiers :Inspiration: https://gist.github.com/fjossinet/5673672 """ from bioservices import EUtils e = EUtils() idlist = e.ESearch(db='nucleotide', term='txid%s[Organism:exp]' % self.taxid, restart=0, retmax=retmax)['idlist'] results = e.ESummary(db='nucleotide', id=idlist, retmax=retmax) return results
def get_entrez_summary(gene_id): e = EUtils() ret = e.ESearch('gene', gene_id) ret = e.EFetch('gene', ret['idlist']) return ret.decode("utf-8")