def _itisterms(endpt, args={}, **kwargs): ''' Get itis term Usage:: pytaxize._itisterms("buya") ''' out = Refactor(itis_base + endpt, payload=args, request='get').xml(**kwargs) nodes = out.xpath("//ax21:itisTerms", namespaces=ns21) nodes2 = [x.getchildren() for x in nodes] allnodes = [[_get_text_single(y) for y in x] for x in nodes2] output = [] for x in allnodes: kyz = [y.keys()[0] for y in x] notuniq = set([v for v in kyz if kyz.count(v) > 1]) if len(notuniq) > 0: for z in notuniq: tt = ','.join([m.values()[0] for m in x if m.keys()[0] == z]) toadd = {z: tt} uu = [v for v in x if v.keys()[0] not in z] uu.append(toadd) output.append(uu) else: output.append(x) df = pd.DataFrame([{k: v for d in R for k, v in d.items()} for R in output]) return df[[x.keys()[0] for x in allnodes[0]]]
def func(x, y, checklist): url = "https://www.catalogueoflife.org/col/webservice" if checklist is None: pass else: checklist = str(checklist) if checklist in ["2012", "2011", "2010"]: url = re.sub("col", "annual-checklist/" + checklist, url) else: url = "https://www.catalogueoflife.org/annual-checklist/year/webservice" url = re.sub("year", checklist, url) payload = { "name": x, "id": y, "format": format, "response": "full", "start": start, } payload = {k: v for k, v in payload.items() if v is not None} tt = Refactor(url, payload, request="get").xml() childtaxa = tt.xpath("//child_taxa//taxon") if len(childtaxa) == 0: sys.exit("Please enter a valid search name") outlist = [] for i in range(len(childtaxa)): tt_ = childtaxa[i].getchildren() outlist.append( dict(zip(["id", "name", "rank"], [x.text for x in tt_[:3]]))) return outlist
def func(x, y): url = "http://www.catalogueoflife.org/col/webservice" if (checklist.__class__.__name__ == 'NoneType'): pass else: if (checklist in ['2012', '2011', '2010']): url = re.sub("col", "annual-checklist/" + checklist, url) else: url = "http://www.catalogueoflife.org/annual-checklist/year/webservice" url = re.sub("year", checklist, url) payload = { 'name': x, 'id': y, 'format': format, 'response': "full", 'start': start } tt = Refactor(url, payload, request='get').xml() childtaxa = tt.xpath('//child_taxa//taxon') if len(childtaxa) == 0: sys.exit('Please enter a valid search name') outlist = [] for i in range(len(childtaxa)): tt_ = childtaxa[i].getchildren() outlist.append( dict(zip(['id', 'name', 'rank'], [x.text for x in tt_[:3]]))) return outlist
def _itisterms(endpt, args={}, **kwargs): ''' Get itis terms Usage: pytaxize._itisterms(x="buya") ''' out = Refactor(itis_base + endpt, payload=args, request='get').xml(**kwargs) nodes = out.xpath("//ax21:itisTerms", namespaces=ns21) nodes2 = [x.getchildren() for x in nodes] allnodes = [[_get_text_single(y) for y in x] for x in nodes2] output = [] for x in allnodes: kyz = [y.keys()[0] for y in x] notuniq = set([v for v in kyz if kyz.count(v) > 1]) if len(notuniq) > 0: for z in notuniq: tt = ','.join([ m.values()[0] for m in x if m.keys()[0] == z ]) toadd = { z: tt } uu = [ v for v in x if v.keys()[0] not in z ] uu.append(toadd) output.append(uu) else: output.append(x) df = pd.concat([pd.DataFrame([y.values()[0] for y in x]).transpose() for x in output]) df.columns = [x.keys()[0] for x in allnodes[0]] return df
def func(x, y): url = "http://www.catalogueoflife.org/col/webservice" if(checklist.__class__.__name__ == 'NoneType'): pass else: if(checklist in ['2012','2011','2010']): url = re.sub("col", "annual-checklist/" + checklist, url) else: url = "http://www.catalogueoflife.org/annual-checklist/year/webservice" url = re.sub("year", checklist, url) payload = {'name': x, 'id': y, 'start': start} tt = Refactor(url, payload, request='get').xml() stuff = tt.xpath('//result') outlist = [] for i in range(len(stuff)): tt_ = stuff[i] each = {} for g in range(len(tt_)): for e in tt_[g].iter(): each.update({e.tag: e.text}) outlist.append(each) # values = [x.text for x in tt_[:4]] # tags = [x.tag for x in tt_[:4]] # mydict = dict(zip(tags, values)) df = pd.DataFrame(outlist) # tt_ = stuff[0].getchildren() # res = [x.text for x in tt_[:4]] return df
def searchcol(x): payload = {'name':x, 'format':format, 'response':"full", 'start':start} tt = Refactor(url, payload, request='get').xml() childtaxa = tt.xpath('//child_taxa//taxon') outlist = [] for i in range(len(childtaxa)): tt_ = childtaxa[i].getchildren() outlist.append([x.text for x in tt_[:3]]) df = pd.DataFrame(outlist, columns=['id','name','rank']) return df
def searchbycommonnameendswith(x, **kwargs): ''' Search for tsn by common name ending with Usage: pytaxize.searchbycommonnameendswith("snake") ''' out= Refactor(itis_base + 'searchByCommonNameEndsWith', payload={'srchKey':x}, request='get').xml(**kwargs) matches = ["commonName","language","tsn"] tmp = out.xpath('//ax21:commonNames', namespaces=ns21) return _itisdf(tmp[0], ns21, matches, _tolower(matches))
def getgeographicvalues(**kwargs): ''' Get all possible geographic values :param **kwargs: Curl options passed on to \code{\link[httr]{GET}} Usage pytaxize.getgeographicvalues() ''' out = Refactor(itis_base + 'getGeographicValues', payload={}, request='get').xml(**kwargs) ns = {'ax21':'http://metadata.itis_service.itis.usgs.gov/xsd'} nodes = out.xpath("//ax21:geographicValues", namespaces=ns) gv = [x.text for x in nodes] return pd.DataFrame(gv, columns=['geographicvalues'])
def getcredibilityratings(**kwargs): ''' Get possible credibility ratings :param **kwargs: Curl options passed on to \code{\link[httr]{GET}} Usage: pytaxize.getcredibilityratings() ''' out = Refactor(itis_base + 'getCredibilityRatings', payload={}, request='get').xml(**kwargs) nodes = out.xpath("//ax23:credibilityValues", namespaces=ns23) credibilityValues = [x.text for x in nodes] df = pd.DataFrame(credibilityValues, columns=['credibilityValues']) return df
def ncbi(self, x, **kwargs): if x is None: return [] key = os.environ.get("ENTREZ_KEY") if key is None: raise Exception("ENTREZ_KEY is not defined") query = {"db": "taxonomy", "ID": x, "api_key": key} url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" res = Refactor(url, query, "get").xml(**kwargs) z = res.xpath("//TaxaSet/Taxon/OtherNames/GenbankCommonName") return [w.text for w in z]
def searchbycommonname(x, **kwargs): ''' Search for tsn by common name Usage: pytaxize.searchbycommonname(x="american bullfrog") pytaxize.searchbycommonname("ferret-badger") pytaxize.searchbycommonname("polar bear") ''' out= Refactor(itis_base + 'searchByCommonName', payload={'srchKey':x}, request='get').xml(**kwargs) matches = ["commonName","language","tsn"] tmp = out.xpath('//ax21:commonNames', namespaces=ns21) return _itisdf(tmp[0], ns21, matches, _tolower(matches))
def getcredibilityratings(**kwargs): ''' Get possible credibility ratings :param **kwargs: Curl options passed on to `requests.get` Usage:: pytaxize.getcredibilityratings() ''' out = Refactor(itis_base + 'getCredibilityRatings', payload={}, request='get').xml(**kwargs) nodes = out.xpath("//ax23:credibilityValues", namespaces=ns23) return [x.text for x in nodes]
def getlastchangedate(**kwargs): ''' Provides the date the ITIS database was last updated. Usage: pytaxize.getlastchangedate() ''' out= Refactor(itis_base + 'getLastChangeDate', payload={}, request='get').xml(**kwargs) ns = {'ax23':"http://metadata.itis_service.itis.usgs.gov/xsd"} nodes = out.xpath("//ax23:updateDate", namespaces=ns) bb = nodes[0].text dt = time.strptime(bb.split()[0], "%Y-%m-%d") return dt
def itis_ping(**kwargs): ''' Ping the ITIS API Usage: >>> import pytaxize >>> pytaxize.itis_ping() u'<ns:getDescriptionResponse xmlns:ns="http://itis_service.itis.usgs.gov"><ns:return xmlns:ax21="http://data.itis_service.itis.usgs.gov/xsd" xmlns:ax26="http://itis_service.itis.usgs.gov/xsd" xmlns:ax23="http://metadata.itis_service.itis.usgs.gov/xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="ax26:SvcDescription"><ax26:description>This is the ITIS Web Service, providing access to the data behind www.itis.gov. The database contains 641,468 scientific names (486,232 of them valid/accepted) and 118,145 common names.</ax26:description></ns:return></ns:getDescriptionResponse>' ''' tt = Refactor(itis_base + 'getDescription', payload={}, request='get').xml(**kwargs) ns = {'ax26':'http://itis_service.itis.usgs.gov/xsd'} nodes = tt.xpath('//ax26:description', namespaces=ns) text = [x.text for x in nodes][0] return text
def getgeographicvalues(**kwargs): ''' Get all possible geographic values :param **kwargs: Curl options passed on to `requests.get` Usage:: pytaxize.getgeographicvalues() ''' out = Refactor(itis_base + 'getGeographicValues', payload={}, request='get').xml(**kwargs) ns = {'ax21':'http://metadata.itis_service.itis.usgs.gov/xsd'} nodes = out.xpath("//ax21:geographicValues", namespaces=ns) return [x.text for x in nodes]
def itis_ping(**kwargs): ''' Ping the ITIS API Usage:: import pytaxize pytaxize.itis_ping() ''' tt = Refactor(itis_base + 'getDescription', payload={}, request='get').xml(**kwargs) ns = {'ax26':'http://itis_service.itis.usgs.gov/xsd'} nodes = tt.xpath('//ax26:description', namespaces=ns) text = [x.text for x in nodes][0] return text
def searchcol(x): payload = { 'name': x, 'format': format, 'response': "full", 'start': start } tt = Refactor(url, payload, request='get').xml() childtaxa = tt.xpath('//child_taxa//taxon') outlist = [] for i in range(len(childtaxa)): tt_ = childtaxa[i].getchildren() outlist.append([x.text for x in tt_[:3]]) df = pd.DataFrame(outlist, columns=['id', 'name', 'rank']) return df
def getlastchangedate(**kwargs): ''' Provides the date the ITIS database was last updated. Usage:: pytaxize.getlastchangedate() ''' out = Refactor(itis_base + 'getLastChangeDate', payload={}, request='get').xml(**kwargs) ns = {'ax23': "http://metadata.itis_service.itis.usgs.gov/xsd"} nodes = out.xpath("//ax23:updateDate", namespaces=ns) bb = nodes[0].text dt = time.strptime(bb.split()[0], "%Y-%m-%d") return dt
def getcredibilityratings(**kwargs): ''' Get possible credibility ratings :param **kwargs: Curl options passed on to `requests.get` Usage:: pytaxize.getcredibilityratings() ''' out = Refactor(itis_base + 'getCredibilityRatings', payload={}, request='get').xml(**kwargs) nodes = out.xpath("//ax23:credibilityValues", namespaces=ns23) return [x.text for x in nodes]
def itis_ping(**kwargs): ''' Ping the ITIS API Usage:: import pytaxize pytaxize.itis_ping() ''' tt = Refactor(itis_base + 'getDescription', payload={}, request='get').xml(**kwargs) ns = {'ax26': 'http://itis_service.itis.usgs.gov/xsd'} nodes = tt.xpath('//ax26:description', namespaces=ns) text = [x.text for x in nodes][0] return text
def searchbycommonnameendswith(x, **kwargs): ''' Search for tsn by common name ending with Usage:: pytaxize.searchbycommonnameendswith("snake") ''' out = Refactor(itis_base + 'searchByCommonNameEndsWith', payload={ 'srchKey': x }, request='get').xml(**kwargs) matches = ["commonName", "language", "tsn"] tmp = out.xpath('//ax21:commonNames', namespaces=ns21) return _itisdf(tmp[0], ns21, matches, _tolower(matches))
def getgeographicvalues(**kwargs): ''' Get all possible geographic values :param **kwargs: Curl options passed on to `requests.get` Usage:: pytaxize.getgeographicvalues() ''' out = Refactor(itis_base + 'getGeographicValues', payload={}, request='get').xml(**kwargs) ns = {'ax21': 'http://metadata.itis_service.itis.usgs.gov/xsd'} nodes = out.xpath("//ax21:geographicValues", namespaces=ns) return [x.text for x in nodes]
def searchbycommonname(x, **kwargs): ''' Search for tsn by common name Usage:: pytaxize.searchbycommonname(x="american bullfrog") pytaxize.searchbycommonname("ferret-badger") pytaxize.searchbycommonname("polar bear") ''' out = Refactor(itis_base + 'searchByCommonName', payload={ 'srchKey': x }, request='get').xml(**kwargs) matches = ["commonName", "language", "tsn"] tmp = out.xpath('//ax21:commonNames', namespaces=ns21) return _itisdf(tmp[0], ns21, matches, _tolower(matches))
def ubio_search(searchName = None, searchAuth = None, searchYear = None, order = None, sci = None, vern = None, keyCode = None): ''' ubio_search returns NameBankIDs that match search terms :param searchName: (string) - term to search within name string :param searchAuth: (string) - term to search within name authorship :param searchYear: (string) - term to search within name year :param order: (string) - (name or namebankID) field by which the results will be sorted (default is namebankID) :param sci: (int) - (sci, vern, or all) type of results to be returned (default is all) :param vern: (int) - (limit 1000) maximum number of results to be returned (default is 1000) :param keyCode: Your uBio API key; loads from .Rprofile. If you don't have one, obtain one at http://www.ubio.org/index.php?pagename=form. :param callopts: Parameters passed on to httr::GET call. Usage # A basic example >>> import pytaxize >>> >>> pytaxize.ubio_search(searchName = 'elephant', sci = 1, vern = 0) >>> pytaxize.ubio_search(searchName = 'Astragalus aduncus', sci = 1, vern = 0) ''' url = "http://www.ubio.org/webservices/service.php" ubioApiKey = "b052625da5f330e334471f8efe725c07bf4630a6" payload = {'function': 'namebank_search', 'searchName': searchName, 'searchAuth': searchAuth, 'searchYear': searchYear, 'order': order, 'sci': sci, 'vern': vern, 'keyCode': ubioApiKey} tt = Refactor(url, payload, request='get').xml() nodes = tt.xpath('//value') if (len(nodes) == 0): sys.exit('Please enter a valid searchName') outlist = [] for i in range(len(nodes)): tt_ = nodes[i].getchildren() outlist.append([x.text for x in tt_[:8]]) df = pd.DataFrame(outlist, columns=['namebankID','nameString','fullNameString','packageID','packageName','basionymunit','rankID','rankName']) return df
def func(x, y): url = "https://www.catalogueoflife.org/col/webservice" if checklist is None: pass else: if checklist in ["2012", "2011", "2010"]: url = re.sub("col", "annual-checklist/" + checklist, url) else: url = "https://www.catalogueoflife.org/annual-checklist/year/webservice" url = re.sub("year", checklist, url) payload = {"name": x, "id": y, "start": start} tt = Refactor(url, payload, request="get").xml() stuff = tt.xpath("//result") outlist = [] for i in range(len(stuff)): tt_ = stuff[i] each = {} for g in range(len(tt_)): for e in tt_[g].iter(): each.update({e.tag: e.text}) outlist.append(each) return outlist
def func(x, y): url = "http://www.catalogueoflife.org/col/webservice" if(checklist.__class__.__name__ == 'NoneType'): pass else: if(checklist in ['2012','2011','2010']): url = re.sub("col", "annual-checklist/" + checklist, url) else: url = "http://www.catalogueoflife.org/annual-checklist/year/webservice" url = re.sub("year", checklist, url) payload = {'name':x, 'id':y, 'format':format, 'response':"full", 'start':start} tt = Refactor(url, payload, request='get').xml() childtaxa = tt.xpath('//child_taxa//taxon') if len(childtaxa) == 0: sys.exit('Please enter a valid search name') outlist = [] for i in range(len(childtaxa)): tt_ = childtaxa[i].getchildren() outlist.append([x.text for x in tt_[:3]]) df = pd.DataFrame(outlist, columns=['id','name','rank']) return df
def func(x, y): url = "http://www.catalogueoflife.org/col/webservice" if (checklist.__class__.__name__ == 'NoneType'): pass else: if (checklist in ['2012', '2011', '2010']): url = re.sub("col", "annual-checklist/" + checklist, url) else: url = "http://www.catalogueoflife.org/annual-checklist/year/webservice" url = re.sub("year", checklist, url) payload = {'name': x, 'id': y, 'start': start} tt = Refactor(url, payload, request='get').xml() stuff = tt.xpath('//result') outlist = [] for i in range(len(stuff)): tt_ = stuff[i] each = {} for g in range(len(tt_)): for e in tt_[g].iter(): each.update({e.tag: e.text}) outlist.append(each) return outlist