コード例 #1
0
def test_name_lookup():
    "species.name_lookup - basic test"
    res = species.name_lookup(q="mammalia")
    assert dict == res.__class__
    assert 6 == len(res)
    assert 100 == len(res["results"])
    assert "Mammalia" == res["results"][0]["canonicalName"]
コード例 #2
0
def find_species(original_species_name, rank=None):
    """
    Find species from gbif with lookup query.
    :param original_species_name: the name of species we want to find
    :param rank: taxonomy rank
    :return: List of species
    """
    print('Find species : %s' % original_species_name)
    try:
        response = species.name_lookup(
            q=original_species_name,
            limit=10,
            rank=rank
        )
        if 'results' in response:
            results = response['results']
            for result in results:
                rank = result.get('rank', '')
                rank_key = rank.lower() + 'Key'
                key_found = (
                    'nubKey' in result or rank_key in result)
                if key_found and 'taxonomicStatus' in result:
                    if result['taxonomicStatus'] == 'ACCEPTED' or \
                        result['taxonomicStatus'] == 'SYNONYM':
                        return result
    except HTTPError:
        print('Species not found')

    return None
コード例 #3
0
def test_name_lookup_faceting():
    "species.name_lookup - faceting"
    res = species.name_lookup(facet="status", limit=0)
    assert dict == res.__class__
    assert 6 == len(res)
    assert 0 == len(res["results"])
    assert 1 == len(res["facets"])
    assert 2 == len(res["facets"][0])
コード例 #4
0
    def parse(name, target_level, taxo_dict):
        # Assumption is that self.taxo_dict has relevant keys and Nones as values
        taxa = list(taxo_dict.keys())

        # Rank messes with species-level lookups, so only use for higher-level searches
        if target_level == 'Species':
            logging.info(
                f'Calling pygbif.species.namelookup with name "{name}"')
            data = species.name_lookup(q=name, limit=1000)
        else:
            logging.info(
                f'Calling pygbif.species.namelookup with name "{name}" and rank "{target_level}"'
            )
            data = species.name_lookup(q=name, rank=target_level, limit=1000)

        results = data['results']
        logging.debug(f'PyGBIF returned {len(results)} results')

        missing_identifier = "N/A"
        if len(results) > 0:  # If data was returned by pygbif
            avc = AttributeValueCounter(results, missing=missing_identifier)

            # slice to exclude levels more specific than the target
            relevant_levels = taxa[:taxa.index(target_level) + 1]

            for level in relevant_levels:
                logging.debug(f"Attribute Value Counts for {level}: " + "\n" +
                              avc.visual_summary(level.lower()))
                rankings = avc[level.lower()]

                # Remove any N/A's from the rankings
                rankings.pop(missing_identifier)

                top_result = max(rankings, key=rankings.get)
                logging.debug(f'Top result for "{level}" is "{top_result}"\n')
                taxo_dict[level] = max(rankings, key=rankings.get)
                # TODO Add some aliases so that 'Metazoa' remaps to 'Animalia' and so on

        return taxo_dict
コード例 #5
0
 def get_children(self, parent_taxid, children_rank, habitat=None):
     children = species.name_lookup(
         higherTaxonKey=parent_taxid,
         type="occurrence",
         datasetKey="d7dddbf4-2cf0-4f39-9b2a-bb099caae36c",  # Look in GBIF Backbone only
         rank=children_rank.upper(),
         habitat=habitat,
         limit=1000,
     )
     results = []
     if len(children["results"]) == 1000:
         logger.error(
             f"Number of results for {taxid} exceed the limit of 1000 records. Results may be incomplete."
         )
     for child in children["results"]:
         if child["taxonomicStatus"] == "ACCEPTED":
             results.append(child)
     return results
コード例 #6
0
    def gbif_name_resolve(self):
        for name in self.names:

            # Get gbif-resolved name
            result = species.name_lookup(q=name.label, limit=1)

            # Ignore any names which don't find a match (aka result is not empty)
            if result['results']:
                # Update Name with usageKey
                name.usage_key = result['results'][0]['key']

                # Create records for Taxonomy table
                self.taxonomy_results.append(
                    Taxonomy(
                        usageKey=result['results'][0]['key'],
                        scientificName=result['results'][0].get(
                            'scientificName', None),
                        canonicalName=result['results'][0].get(
                            'canonicalName', None),
                        rank=result['results'][0].get('rank', None),
                        status=result['results'][0].get('status', None),
                        kingdom=result['results'][0].get('kingdom', None),
                        phylum=result['results'][0].get('phylum', None),
                        order=result['results'][0].get('order', None),
                        family=result['results'][0].get('family', None),
                        species=result['results'][0].get('species', None),
                        genus=result['results'][0].get('genus', None),
                        kingdomKey=result['results'][0].get(
                            'kingdomKey', None),
                        phylumKey=result['results'][0].get('phylumKey', None),
                        classKey=result['results'][0].get('classKey', None),
                        orderKey=result['results'][0].get('orderKey', None),
                        familyKey=result['results'][0].get('familyKey', None),
                        genusKey=result['results'][0].get('genusKey', None),
                        speciesKey=result['results'][0].get(
                            'speciesKey', None),
                        class_name=result['results'][0].get('class', None),
                        rundate=date.today()))
            else:
                print(f"No match for: {name.label}")
                continue

        return self.taxonomy_results, self.names
コード例 #7
0
ファイル: gbif.py プロジェクト: Christiaanvdm/django-bims
def find_species(original_species_name):
    """
    Find species from gbif with lookup query.
    :param original_species_name: the name of species we want to find
    :return: List of species
    """
    print('Find species : %s' % original_species_name)
    try:
        response = species.name_lookup(q=original_species_name, limit=3)
        if 'results' in response:
            results = response['results']
            for result in results:
                key_found = 'nubKey' in result or 'speciesKey' in result
                if key_found and 'taxonomicStatus' in result:
                    if result['taxonomicStatus'] == 'ACCEPTED' or \
                            result['taxonomicStatus'] == 'SYNONYM':
                        return result
    except HTTPError:
        print('Species not found')

    return None
コード例 #8
0
def gbif_id_from_gbif_api(botanical_name: str, ipni_id: str) -> Optional[int]:
    """the gbif api does not allow searching by other database's taxonId; therefore, we search by
    botanical name and ipni dataset key, then we compare the (external) taxonId"; if we have a match, we
    can return the gbif taxon id"""
    logger.info(
        f'Searching IPNI Dataset at GBIF for {botanical_name} to get GBIF ID.')
    lookup = species.name_lookup(q=botanical_name, datasetKey=IPNI_DATASET_KEY)
    if not lookup.get('results'):
        logger.info(f"No results on IPNI Dataset at GBIF.")
        return None

    results_compared = [
        r for r in lookup['results'] if r.get('taxonID') == ipni_id
    ]
    if not results_compared:
        logger.info(f"No results on IPNI Dataset at GBIF matching IPNI ID.")
        return None

    # nub is the name of the internal gbif database
    gbif_id = results_compared[0].get('nubKey') or None
    logger.info(f"Found GBIF ID in IPNI Dataset at GBIF: {gbif_id}.")
    return gbif_id
コード例 #9
0
def test_name_lookup_rank():
    "species.name_lookup - rank parameter"
    res = species.name_lookup("Helianthus annuus", rank="species", limit=10)
    assert dict == res.__class__
    assert 10 == len(res["results"])
    assert "SPECIES" == list(set([x["rank"] for x in res["results"]]))[0]
コード例 #10
0
def test_name_lookup_paging():
    "species.name_lookup - paging"
    res = species.name_lookup(q="mammalia", limit=1)
    assert dict == res.__class__
    assert 6 == len(res)
    assert 1 == len(res["results"])
コード例 #11
0
def find_species(original_species_name,
                 rank=None,
                 returns_all=False,
                 **classifier):
    """
    Find species from gbif with lookup query.
    :param original_species_name: the name of species we want to find
    :param rank: taxonomy rank
    :param returns_all: returns all response
    :param classifier: rank classifier
    :return: List of species
    """
    print('Find species : %s' % original_species_name)
    try:
        response = species.name_lookup(q=original_species_name,
                                       limit=50,
                                       rank=rank)
        accepted_data = None
        synonym_data = None
        other_data = None
        if 'results' in response:
            results = response['results']
            if returns_all:
                return results
            for result in results:
                if classifier:
                    classifier_found = True
                    for key, value in classifier.items():
                        if value:
                            classifier_found = False
                            if key == 'class_name':
                                key = 'class'
                            if key not in result:
                                continue
                            if value.lower() == result[key].lower():
                                classifier_found = True
                    if not classifier_found:
                        continue
                rank = result.get('rank', '')
                if rank.lower() in RANK_KEYS:
                    rank_key = rank.lower() + 'Key'
                else:
                    rank_key = 'key'
                key_found = ('nubKey' in result or rank_key in result)
                if key_found and 'taxonomicStatus' in result:
                    if result['taxonomicStatus'] == 'ACCEPTED':
                        if accepted_data:
                            if result['key'] < accepted_data['key']:
                                accepted_data = result
                        else:
                            accepted_data = result
                    if result['taxonomicStatus'] == 'SYNONYM':
                        if synonym_data:
                            if result['key'] < synonym_data['key']:
                                synonym_data = result
                        else:
                            synonym_data = result
                    else:
                        if other_data:
                            if result['key'] < other_data['key']:
                                other_data = result
                        else:
                            other_data = result
        if accepted_data:
            return accepted_data
        if synonym_data:
            return synonym_data
        return other_data
    except HTTPError:
        print('Species not found')

    return None
コード例 #12
0
#!/usr/bin/env python
# coding: utf-8
import json
import pandas as pd
from pygbif import species


termo = ["Handroanthus", "Ursus"]
espec = species.name_lookup(q=termo[1], limit=2000)
lst = espec['results']
df = pd.DataFrame(lst)

new_df = df[['taxonKey', 'scientificName', 'canonicalName', 'taxonomicStatus', 'rank', 'genus']]
df = pd.DataFrame(espec['results'])

grupo1 = df.groupby([ 'key', 'scientificName', 'rank','taxonomicStatus' ])

for k, v in grupo1.groups.items():
    print(k)