def test_name_lookup(): "species.name_lookup - basic test" res = species.name_lookup(q="mammalia") assert dict == res.__class__ assert 6 == len(res) assert 100 == len(res["results"]) assert "Mammalia" == res["results"][0]["canonicalName"]
def find_species(original_species_name, rank=None): """ Find species from gbif with lookup query. :param original_species_name: the name of species we want to find :param rank: taxonomy rank :return: List of species """ print('Find species : %s' % original_species_name) try: response = species.name_lookup( q=original_species_name, limit=10, rank=rank ) if 'results' in response: results = response['results'] for result in results: rank = result.get('rank', '') rank_key = rank.lower() + 'Key' key_found = ( 'nubKey' in result or rank_key in result) if key_found and 'taxonomicStatus' in result: if result['taxonomicStatus'] == 'ACCEPTED' or \ result['taxonomicStatus'] == 'SYNONYM': return result except HTTPError: print('Species not found') return None
def test_name_lookup_faceting(): "species.name_lookup - faceting" res = species.name_lookup(facet="status", limit=0) assert dict == res.__class__ assert 6 == len(res) assert 0 == len(res["results"]) assert 1 == len(res["facets"]) assert 2 == len(res["facets"][0])
def parse(name, target_level, taxo_dict): # Assumption is that self.taxo_dict has relevant keys and Nones as values taxa = list(taxo_dict.keys()) # Rank messes with species-level lookups, so only use for higher-level searches if target_level == 'Species': logging.info( f'Calling pygbif.species.namelookup with name "{name}"') data = species.name_lookup(q=name, limit=1000) else: logging.info( f'Calling pygbif.species.namelookup with name "{name}" and rank "{target_level}"' ) data = species.name_lookup(q=name, rank=target_level, limit=1000) results = data['results'] logging.debug(f'PyGBIF returned {len(results)} results') missing_identifier = "N/A" if len(results) > 0: # If data was returned by pygbif avc = AttributeValueCounter(results, missing=missing_identifier) # slice to exclude levels more specific than the target relevant_levels = taxa[:taxa.index(target_level) + 1] for level in relevant_levels: logging.debug(f"Attribute Value Counts for {level}: " + "\n" + avc.visual_summary(level.lower())) rankings = avc[level.lower()] # Remove any N/A's from the rankings rankings.pop(missing_identifier) top_result = max(rankings, key=rankings.get) logging.debug(f'Top result for "{level}" is "{top_result}"\n') taxo_dict[level] = max(rankings, key=rankings.get) # TODO Add some aliases so that 'Metazoa' remaps to 'Animalia' and so on return taxo_dict
def get_children(self, parent_taxid, children_rank, habitat=None): children = species.name_lookup( higherTaxonKey=parent_taxid, type="occurrence", datasetKey="d7dddbf4-2cf0-4f39-9b2a-bb099caae36c", # Look in GBIF Backbone only rank=children_rank.upper(), habitat=habitat, limit=1000, ) results = [] if len(children["results"]) == 1000: logger.error( f"Number of results for {taxid} exceed the limit of 1000 records. Results may be incomplete." ) for child in children["results"]: if child["taxonomicStatus"] == "ACCEPTED": results.append(child) return results
def gbif_name_resolve(self): for name in self.names: # Get gbif-resolved name result = species.name_lookup(q=name.label, limit=1) # Ignore any names which don't find a match (aka result is not empty) if result['results']: # Update Name with usageKey name.usage_key = result['results'][0]['key'] # Create records for Taxonomy table self.taxonomy_results.append( Taxonomy( usageKey=result['results'][0]['key'], scientificName=result['results'][0].get( 'scientificName', None), canonicalName=result['results'][0].get( 'canonicalName', None), rank=result['results'][0].get('rank', None), status=result['results'][0].get('status', None), kingdom=result['results'][0].get('kingdom', None), phylum=result['results'][0].get('phylum', None), order=result['results'][0].get('order', None), family=result['results'][0].get('family', None), species=result['results'][0].get('species', None), genus=result['results'][0].get('genus', None), kingdomKey=result['results'][0].get( 'kingdomKey', None), phylumKey=result['results'][0].get('phylumKey', None), classKey=result['results'][0].get('classKey', None), orderKey=result['results'][0].get('orderKey', None), familyKey=result['results'][0].get('familyKey', None), genusKey=result['results'][0].get('genusKey', None), speciesKey=result['results'][0].get( 'speciesKey', None), class_name=result['results'][0].get('class', None), rundate=date.today())) else: print(f"No match for: {name.label}") continue return self.taxonomy_results, self.names
def find_species(original_species_name): """ Find species from gbif with lookup query. :param original_species_name: the name of species we want to find :return: List of species """ print('Find species : %s' % original_species_name) try: response = species.name_lookup(q=original_species_name, limit=3) if 'results' in response: results = response['results'] for result in results: key_found = 'nubKey' in result or 'speciesKey' in result if key_found and 'taxonomicStatus' in result: if result['taxonomicStatus'] == 'ACCEPTED' or \ result['taxonomicStatus'] == 'SYNONYM': return result except HTTPError: print('Species not found') return None
def gbif_id_from_gbif_api(botanical_name: str, ipni_id: str) -> Optional[int]: """the gbif api does not allow searching by other database's taxonId; therefore, we search by botanical name and ipni dataset key, then we compare the (external) taxonId"; if we have a match, we can return the gbif taxon id""" logger.info( f'Searching IPNI Dataset at GBIF for {botanical_name} to get GBIF ID.') lookup = species.name_lookup(q=botanical_name, datasetKey=IPNI_DATASET_KEY) if not lookup.get('results'): logger.info(f"No results on IPNI Dataset at GBIF.") return None results_compared = [ r for r in lookup['results'] if r.get('taxonID') == ipni_id ] if not results_compared: logger.info(f"No results on IPNI Dataset at GBIF matching IPNI ID.") return None # nub is the name of the internal gbif database gbif_id = results_compared[0].get('nubKey') or None logger.info(f"Found GBIF ID in IPNI Dataset at GBIF: {gbif_id}.") return gbif_id
def test_name_lookup_rank(): "species.name_lookup - rank parameter" res = species.name_lookup("Helianthus annuus", rank="species", limit=10) assert dict == res.__class__ assert 10 == len(res["results"]) assert "SPECIES" == list(set([x["rank"] for x in res["results"]]))[0]
def test_name_lookup_paging(): "species.name_lookup - paging" res = species.name_lookup(q="mammalia", limit=1) assert dict == res.__class__ assert 6 == len(res) assert 1 == len(res["results"])
def find_species(original_species_name, rank=None, returns_all=False, **classifier): """ Find species from gbif with lookup query. :param original_species_name: the name of species we want to find :param rank: taxonomy rank :param returns_all: returns all response :param classifier: rank classifier :return: List of species """ print('Find species : %s' % original_species_name) try: response = species.name_lookup(q=original_species_name, limit=50, rank=rank) accepted_data = None synonym_data = None other_data = None if 'results' in response: results = response['results'] if returns_all: return results for result in results: if classifier: classifier_found = True for key, value in classifier.items(): if value: classifier_found = False if key == 'class_name': key = 'class' if key not in result: continue if value.lower() == result[key].lower(): classifier_found = True if not classifier_found: continue rank = result.get('rank', '') if rank.lower() in RANK_KEYS: rank_key = rank.lower() + 'Key' else: rank_key = 'key' key_found = ('nubKey' in result or rank_key in result) if key_found and 'taxonomicStatus' in result: if result['taxonomicStatus'] == 'ACCEPTED': if accepted_data: if result['key'] < accepted_data['key']: accepted_data = result else: accepted_data = result if result['taxonomicStatus'] == 'SYNONYM': if synonym_data: if result['key'] < synonym_data['key']: synonym_data = result else: synonym_data = result else: if other_data: if result['key'] < other_data['key']: other_data = result else: other_data = result if accepted_data: return accepted_data if synonym_data: return synonym_data return other_data except HTTPError: print('Species not found') return None
#!/usr/bin/env python # coding: utf-8 import json import pandas as pd from pygbif import species termo = ["Handroanthus", "Ursus"] espec = species.name_lookup(q=termo[1], limit=2000) lst = espec['results'] df = pd.DataFrame(lst) new_df = df[['taxonKey', 'scientificName', 'canonicalName', 'taxonomicStatus', 'rank', 'genus']] df = pd.DataFrame(espec['results']) grupo1 = df.groupby([ 'key', 'scientificName', 'rank','taxonomicStatus' ]) for k, v in grupo1.groups.items(): print(k)