Example #1
0
def findName(accession):
    s = EUtils()
    theID = None
    geneOrProtein = "gene"
    # Check gene database
    res = s.ESearch("gene", accession)
    if len(res["idlist"]) > 0:
        # Get the ID in the "gene" database
        theID = res["idlist"][0]
    # If that fails, check the protein database
    if theID == None:
        res = s.ESearch("protein", accession)
        if len(res["idlist"]) > 0:
            theID = res["idlist"][0]
            geneOrProtein = "protein"
    # Couldn't find in either database
    if not theID:
        print("ERROR: couldn't find link for %s" % accession)

    # Get link to the corresponding ID in the Taxonomy database
    link = s.ELink(db="taxonomy",
                   dbfrom=geneOrProtein,
                   id=theID,
                   retmode="json")
    taxID = json.loads(link)["linksets"][0]["linksetdbs"][0]["links"][0]

    # Download taxonomy record
    tax = s.EFetch(db="taxonomy", id=taxID)
    #print(tax)
    tree = ElementTree.fromstring(tax)
    xmlP = parseString(tax)
    #print(xmlP.toprettyxml())
    taxonTag = tree.find("Taxon")
    sciName = taxonTag.find("ScientificName")
    return sciName.text
Example #2
0
    def to_genbank(self, retmax=10000):
        """Draft: from a TaxID, uses EUtils to retrieve
        the GenBank identifiers

        :Inspiration: https://gist.github.com/fjossinet/5673672
        """
        from bioservices import EUtils
        e = EUtils()
        idlist = e.ESearch(db='nucleotide',
                           term='txid%s[Organism:exp]' % self.taxid,
                           restart=0,
                           retmax=retmax)['idlist']
        results = e.ESummary(db='nucleotide', id=idlist, retmax=retmax)
        return results
Example #3
0
def get_entrez_summary(gene_id):
    e = EUtils()
    ret = e.ESearch('gene', gene_id)
    ret = e.EFetch('gene', ret['idlist'])
    return ret.decode("utf-8")