def _gnr_resolve( names="H**o sapiens", source=None, format="json", resolve_once="false", with_context="false", best_match_only="false", header_only="false", preferred_data_sources="false", http="get", ): url = "https://resolver.globalnames.org/name_resolvers" payload = { "data_source_ids": source, "format": format, "resolve_once": resolve_once, "with_context": with_context, "best_match_only": best_match_only, "header_only": header_only, "preferred_data_sources": preferred_data_sources, } if names.__class__.__name__ == "list": if len(names) > 300 and http == "get": http = "post" else: names = "|".join(names) payload["names"] = names else: payload["names"] = names if http == "get": result_json = Refactor(url, payload, request="get").json() else: if names.__class__.__name__ != "list": result_json = Refactor(url, payload, request="post").json() else: with open("names_list.txt", "w") as f: for name in names: f.write(name + "\n") f.close() result_json = Refactor(url, payload, request="post").json( files={"file": open("names_list.txt", "rb")} ) while result_json["status"] == "working": result_url = result_json["url"] time.sleep(10) result_json = Refactor(result_url, payload={}, request="get").json() os.remove("names_list.txt") data = [] for each_result in result_json["data"]: data.append(each_result["results"] if "results" in each_result else []) return data
def getacceptednamesfromtsn(tsn, **kwargs): ''' Get accepted names from tsn :param tsn: taxonomic serial number (TSN) (character or numeric) Usage:: # TSN accepted - good name pytaxize.getacceptednamesfromtsn('208527') # TSN not accepted - input TSN is old name pytaxize.getacceptednamesfromtsn('504239') ''' out = Refactor(itis_base + 'getAcceptedNamesFromTSN', payload={ 'tsn': tsn }, request='get').xml(**kwargs) temp = out.getchildren() if (temp[0].getchildren()[1].values()[0] == 'true'): dat = temp[0].getchildren()[0].text else: nodes = temp[0].getchildren()[1].getchildren() dat = _parse_nodes(nodes) dat.pop('author') dat['submittedTsn'] = temp[0].getchildren()[0].text return dat
def _itisterms(endpt, args={}, **kwargs): ''' Get itis term Usage:: pytaxize._itisterms("buya") ''' out = Refactor(itis_base + endpt, payload=args, request='get').xml(**kwargs) nodes = out.xpath("//ax21:itisTerms", namespaces=ns21) nodes2 = [x.getchildren() for x in nodes] allnodes = [[_get_text_single(y) for y in x] for x in nodes2] output = [] for x in allnodes: kyz = [y.keys()[0] for y in x] notuniq = set([v for v in kyz if kyz.count(v) > 1]) if len(notuniq) > 0: for z in notuniq: tt = ','.join([m.values()[0] for m in x if m.keys()[0] == z]) toadd = {z: tt} uu = [v for v in x if v.keys()[0] not in z] uu.append(toadd) output.append(uu) else: output.append(x) df = pd.DataFrame([{k: v for d in R for k, v in d.items()} for R in output]) return df[[x.keys()[0] for x in allnodes[0]]]
def searchforanymatchpaged(x, pagesize, pagenum, ascend, **kwargs): ''' Search for any matched page for descending (logical) Usage:: pytaxize.searchforanymatchpaged(x=202385, pagesize=100, pagenum=1, ascend=False) pytaxize.searchforanymatchpaged("Zy", pagesize=100, pagenum=1, ascend=False) ''' args = { 'srchKey': x, 'pageSize': pagesize, 'pageNum': pagenum, 'ascend': ascend } out = Refactor(itis_base + 'searchForAnyMatchPaged', payload=args, request='get').xml(**kwargs) tmp = out.getchildren()[0].getchildren() output = [] for v in tmp: tmp = v.getchildren() for w in tmp: output.append( dict( zip([gettag(e) for e in w.iter()], [e.text for e in w.iter()]))) return output
def searchforanymatch(x, **kwargs): ''' Search for any match Usage:: pytaxize.searchforanymatch(x=202385) pytaxize.searchforanymatch(x="dolphin") ''' out = Refactor(itis_base + 'searchForAnyMatch', payload={ 'srchKey': x }, request='get').xml(**kwargs) # if isinstance(x, basestring): tmp = out.getchildren()[0].getchildren() output = [] for v in tmp: tmp = v.getchildren() for w in tmp: output.append( dict( zip([gettag(e) for e in w.iter()], [e.text for e in w.iter()]))) return output
def jurisdictional_origin(tsn, as_dataframe=False, **kwargs): """ Get jurisdictional origin from tsn :param tsn: (int) TSN for a taxonomic group :param as_dataframe: (bool) specify return type, if pandas is available :param \*\*kwargs: Curl options passed on to `requests.get` Usage:: from pytaxize import itis itis.jurisdictional_origin(180543) itis.jurisdictional_origin(180543, True) """ out = Refactor( itis_base + "getJurisdictionalOriginFromTSN", payload={ "tsn": tsn }, request="get", ).json(**kwargs) out.pop("class") if out["jurisdictionalOrigins"][0] is not None: [z.pop("class") for z in out["jurisdictionalOrigins"]] return _df(out["jurisdictionalOrigins"], as_dataframe)
def _itisterms(endpt, args={}, as_dataframe=False, **kwargs): out = Refactor(itis_base + endpt, payload=args, request="get").json(**kwargs) if out["itisTerms"][0] is None: return {} [w.pop("class") for w in out["itisTerms"]] return _df(out["itisTerms"], as_dataframe)
def hierarchy_down(tsn, as_dataframe=False, **kwargs): """ Get hierarchy down from tsn :param tsn: (int) TSN for a taxonomic group :param as_dataframe: (bool) specify return type, if pandas is available :param \*\*kwargs: Curl options passed on to `requests.get` Usage:: from pytaxize import itis itis.hierarchy_down(tsn = 179913) itis.hierarchy_down(tsn = 161994) itis.hierarchy_down(tsn = 9999999) """ tt = Refactor(itis_base + "getHierarchyDownFromTSN", payload={ "tsn": tsn }, request="get").json(**kwargs) tt.pop("class") if tt["hierarchyList"]: pass [z.pop("class") for z in tt["hierarchyList"] if z is not None] return _df(tt["hierarchyList"], as_dataframe)
def currency(tsn, as_dataframe=False, **kwargs): """ Get currency from tsn :param tsn: (int) TSN for a taxonomic group :param as_dataframe: (bool) specify return type, if pandas is available :param \*\*kwargs: Curl options passed on to `requests.get` Usage:: from pytaxize import itis # currency data itis.currency(28727) # no currency dat itis.currency(526852) # as data_frame itis.currency(526852, as_dataframe=True) """ out = Refactor(itis_base + "getCurrencyFromTSN", payload={ "tsn": tsn }, request="get").json(**kwargs) out.pop("class") return _df(out, as_dataframe)
def accepted_names(tsn, **kwargs): """ Get accepted names from tsn :param tsn: taxonomic serial number (TSN) (character or numeric) :param \*\*kwargs: Curl options passed on to `requests.get` Usage:: from pytaxize import itis # TSN accepted - good name itis.accepted_names(tsn=208527) # TSN not accepted - input TSN is old name itis.accepted_names(tsn=504239) """ out = Refactor(itis_base + "getAcceptedNamesFromTSN", payload={ "tsn": tsn }, request="get").json(**kwargs) if out["acceptedNames"][0] is None: return {} else: out["acceptedNames"][0].pop("class") return out["acceptedNames"][0]
def func(x, y, checklist): url = "https://www.catalogueoflife.org/col/webservice" if checklist is None: pass else: checklist = str(checklist) if checklist in ["2012", "2011", "2010"]: url = re.sub("col", "annual-checklist/" + checklist, url) else: url = "https://www.catalogueoflife.org/annual-checklist/year/webservice" url = re.sub("year", checklist, url) payload = { "name": x, "id": y, "format": format, "response": "full", "start": start, } payload = {k: v for k, v in payload.items() if v is not None} tt = Refactor(url, payload, request="get").xml() childtaxa = tt.xpath("//child_taxa//taxon") if len(childtaxa) == 0: sys.exit("Please enter a valid search name") outlist = [] for i in range(len(childtaxa)): tt_ = childtaxa[i].getchildren() outlist.append( dict(zip(["id", "name", "rank"], [x.text for x in tt_[:3]]))) return outlist
def func(x, y): url = "http://www.catalogueoflife.org/col/webservice" if (checklist.__class__.__name__ == 'NoneType'): pass else: if (checklist in ['2012', '2011', '2010']): url = re.sub("col", "annual-checklist/" + checklist, url) else: url = "http://www.catalogueoflife.org/annual-checklist/year/webservice" url = re.sub("year", checklist, url) payload = { 'name': x, 'id': y, 'format': format, 'response': "full", 'start': start } tt = Refactor(url, payload, request='get').xml() childtaxa = tt.xpath('//child_taxa//taxon') if len(childtaxa) == 0: sys.exit('Please enter a valid search name') outlist = [] for i in range(len(childtaxa)): tt_ = childtaxa[i].getchildren() outlist.append( dict(zip(['id', 'name', 'rank'], [x.text for x in tt_[:3]]))) return outlist
def vascan_search(q, format="json", raw=False): """ Search the CANADENSYS Vascan API. :param q: Taxonomic rank, one of species, genus (default), family, order. :param format: Number of names to get. Maximum depends on the rank. :param raw: Raw data or not (default) :param callopts: Further args passed to request Usage:: import pytaxize pytaxize.vascan_search(q = ["Helianthus annuus"]) pytaxize.vascan_search(q = ["Helianthus annuus"], raw=True) pytaxize.vascan_search(q = ["Helianthus annuus", "Crataegus dodgei"], raw=True) # format type ## json pytaxize.vascan_search(q = ["Helianthus annuus"], format="json", raw=True) ## xml pytaxize.vascan_search(q = ["Helianthus annuus"], format="xml", raw=True) # lots of names, in this case 50 splist = pytaxize.names_list(rank='species', size=50) pytaxize.vascan_search(q = splist) """ if format == "json": url = "http://data.canadensys.net/vascan/api/0.1/search.json" else: url = "http://data.canadensys.net/vascan/api/0.1/search.xml" if len(q) > 1: query = "\n".join(q) payload = {"q": query} if format == "json": out = Refactor(url, payload, request="post").json() else: out = Refactor(url, payload, request="post").raw() return out else: payload = {"q": q} if format == "json": out = Refactor(url, payload, request="get").json() else: out = Refactor(url, payload, request="get").raw() return out
def ncbi(self, x, **kwargs): if x is None: return [] key = os.environ.get("ENTREZ_KEY") if key is None: raise Exception("ENTREZ_KEY is not defined") query = {"db": "taxonomy", "ID": x, "api_key": key} url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" res = Refactor(url, query, "get").xml(**kwargs) z = res.xpath("//TaxaSet/Taxon/OtherNames/GenbankCommonName") return [w.text for w in z]
def _gnr_resolve(names='H**o sapiens', source=None, format='json', resolve_once='false', with_context='false', best_match_only='false', header_only='false', preferred_data_sources='false', http='get'): url = 'http://resolver.globalnames.org/name_resolvers' payload = {'data_source_ids': source, 'format': format, 'resolve_once': resolve_once, 'with_context': with_context, 'best_match_only': best_match_only, 'header_only': header_only, 'preferred_data_sources': preferred_data_sources} if names.__class__.__name__ == 'list': if len(names) > 300 and http == 'get': http = 'post' else: names = "|".join(names) payload['names'] = names else: payload['names'] = names if http == 'get': result_json = Refactor(url, payload, request='get').json() else: if names.__class__.__name__ != 'list': result_json = Refactor(url, payload, request='post').json() else: with open('names_list.txt', 'w') as f: for name in names: f.write(name+"\n") f.close() result_json = Refactor(url, payload, request='post').json(files = {'file': open('names_list.txt', 'rb')}) while result_json['status'] == 'working': result_url = result_json['url'] time.sleep(10) result_json = Refactor(result_url, payload={},request='get').json() os.remove('names_list.txt') data = [] for each_result in result_json['data']: data.append(each_result['results'] if 'results' in each_result else []) return data
def getvernacularlanguages(**kwargs): ''' Provides a list of the unique languages used in the vernacular table. Usage:: pytaxize.getvernacularlanguages() ''' out = Refactor(itis_base + 'getVernacularLanguages', payload={}, request='get').xml(**kwargs) matches = ["languageNames"] return _itisdf(out, ns23, matches, _tolower(matches), "ax23")
def getranknames(**kwargs): ''' Provides a list of all the unique rank names contained in the database and their kingdom and rank ID values. Usage:: pytaxize.getranknames() ''' out = Refactor(itis_base + 'getRankNames', payload={}, request='get').xml(**kwargs) matches = ["kingdomName", "rankId", "rankName"] return _itisdf(out, ns23, matches, _tolower(matches), "ax23")
def getkingdomnames(**kwargs): ''' Get all possible kingdom names Usage:: pytaxize.getkingdomnames() ''' out = Refactor(itis_base + 'getKingdomNames', payload={}, request='get').xml(**kwargs) ns = {'ax23': "http://metadata.itis_service.itis.usgs.gov/xsd"} matches = ["kingdomId", "kingdomName", "tsn"] return _itisdf(out, ns, matches, _tolower(matches), "ax23")
def getjurisdictionvalues(**kwargs): ''' Get possible jurisdiction values Usage:: pytaxize.getjurisdictionvalues() ''' out = Refactor(itis_base + 'getJurisdictionValues', payload={}, request='get').xml(**kwargs) vals = [x.text for x in out.getchildren()[0].getchildren()] return pd.DataFrame(vals, columns=['jurisdictionValues'])
def getjurisdictionoriginvalues(**kwargs): ''' Get jurisdiction origin values Usage:: pytaxize.getjurisdictionoriginvalues() ''' out = Refactor(itis_base + 'getJurisdictionalOriginValues', payload={}, request='get').xml(**kwargs) ns = {'ax23': 'http://metadata.itis_service.itis.usgs.gov/xsd'} matches = ["jurisdiction", "origin"] return _itisdf(out, ns, matches, matches, "ax23")
def getgeographicdivisionsfromtsn(tsn, **kwargs): ''' Get geographic divisions from tsn Usage:: pytaxize.getgeographicdivisionsfromtsn(180543) ''' out = Refactor(itis_base + 'getGeographicDivisionsFromTSN', payload={ 'tsn': tsn }, request='get').xml(**kwargs) toget = ["geographicValue", "updateDate"] return _itis_parse(toget, out, ns21)
def getunacceptabilityreasonfromtsn(tsn, **kwargs): ''' Returns the unacceptability reason, if any, for the TSN. Usage:: pytaxize.getunacceptabilityreasonfromtsn(183671) ''' out = Refactor(itis_base + 'getUnacceptabilityReasonFromTSN', payload={ 'tsn': tsn }, request='get').xml(**kwargs) toget = ["tsn", "unacceptReason"] return _itis_parse(toget, out, ns21)
def gettsnbyvernacularlanguage(language, **kwargs): ''' Get tsn by vernacular language not the international language code (character) Usage:: pytaxize.gettsnbyvernacularlanguage("french") ''' out = Refactor(itis_base + 'getTsnByVernacularLanguage', payload={ 'language': language }, request='get').xml(**kwargs) matches = ["commonName", "language", "tsn"] return _itisdf(out, ns21, matches, _tolower(matches))
def gettaxonomicusagefromtsn(tsn, **kwargs): ''' Returns the usage information for the TSN. Usage:: pytaxize.gettaxonomicusagefromtsn(526852) ''' out = Refactor(itis_base + 'getTaxonomicUsageFromTSN', payload={ 'tsn': tsn }, request='get').xml(**kwargs) toget = ["taxonUsageRating", "tsn"] return _itis_parse(toget, out, ns21)
def gettaxonauthorshipfromtsn(tsn, **kwargs): ''' Returns the author information for the TSN. Usage:: pytaxize.gettaxonauthorshipfromtsn(183671) ''' out = Refactor(itis_base + 'getTaxonAuthorshipFromTSN', payload={ 'tsn': tsn }, request='get').xml(**kwargs) toget = ["authorship", "updateDate", "tsn"] return _itis_parse(toget, out, ns21)
def getreviewyearfromtsn(tsn, **kwargs): ''' Returns the review year for the TSN. Usage:: pytaxize.getreviewyearfromtsn(180541) ''' out = Refactor(itis_base + 'getReviewYearFromTSN', payload={ 'tsn': tsn }, request='get').xml(**kwargs) toget = ["rankId", "reviewYear", "tsn"] return _itis_parse(toget, out, ns21)
def getparenttsnfromtsn(tsn, **kwargs): ''' Returns the parent TSN for the entered TSN. Usage:: pytaxize.getparenttsnfromtsn(202385) ''' out = Refactor(itis_base + 'getParentTSNFromTSN', payload={ 'tsn': tsn }, request='get').xml(**kwargs) toget = ["parentTsn", "tsn"] return _itis_parse(toget, out, ns21)
def getlastchangedate(**kwargs): ''' Provides the date the ITIS database was last updated. Usage:: pytaxize.getlastchangedate() ''' out = Refactor(itis_base + 'getLastChangeDate', payload={}, request='get').xml(**kwargs) ns = {'ax23': "http://metadata.itis_service.itis.usgs.gov/xsd"} nodes = out.xpath("//ax23:updateDate", namespaces=ns) bb = nodes[0].text dt = time.strptime(bb.split()[0], "%Y-%m-%d") return dt
def getcredibilityratings(**kwargs): ''' Get possible credibility ratings :param **kwargs: Curl options passed on to `requests.get` Usage:: pytaxize.getcredibilityratings() ''' out = Refactor(itis_base + 'getCredibilityRatings', payload={}, request='get').xml(**kwargs) nodes = out.xpath("//ax23:credibilityValues", namespaces=ns23) return [x.text for x in nodes]
def getglobalspeciescompletenessfromtsn(tsn, **kwargs): ''' Get global species completeness from tsn Usage:: pytaxize.getglobalspeciescompletenessfromtsn(180541) ''' out = Refactor(itis_base + 'getGlobalSpeciesCompletenessFromTSN', payload={ 'tsn': tsn }, request='get').xml(**kwargs) toget = ["completeness", "rankId", "tsn"] return _itis_parse(toget, out, ns21)