def name_usage_fetch(x, key, shortname, uuid, args, **kwargs): if x != "all" and key is None: raise TypeError( "You must specify `key` if `data` does not equal `all`") if x == "all" and key is None: url = gbif_baseurl + "species" else: if x == "all" and key is not None: url = gbif_baseurl + "species/" + str(key) else: if x in [ "verbatim", "name", "parents", "children", "related", "synonyms", "descriptions", "distributions", "media", "references", "speciesProfiles", "vernacularNames", "typeSpecimens", ]: url = gbif_baseurl + "species/%s/%s" % (str(key), x) else: if x == "root": url = gbif_baseurl + "species/%s/%s" % (uuid, shortname) res = gbif_GET(url, args, **kwargs) return res
def count_schema(**kwargs): """ List the supported metrics by the service :return: dict Usage:: from pygbif import occurrences occurrences.count_schema() """ url = gbif_baseurl + "occurrence/count/schema" out = gbif_GET(url, {}, **kwargs) return out
def count_basisofrecord(**kwargs): """ Lists occurrence counts by basis of record. :return: dict Usage:: from pygbif import occurrences occurrences.count_basisofrecord() """ url = gbif_baseurl + "occurrence/counts/basisOfRecord" out = gbif_GET(url, {}, **kwargs) return out
def getdata(x, uuid, args, **kwargs): if x != "all" and uuid is None: stop('You must specify a uuid if data does not equal "all"') if uuid is None: url = gbif_baseurl + "network" else: if x == "all": url = gbif_baseurl + "network/" + uuid else: url = gbif_baseurl + "network/" + uuid + "/" + x res = gbif_GET(url, args, **kwargs) return {"meta": get_meta(res), "data": parse_results(res, uuid)}
def count(taxonKey=None, basisOfRecord=None, country=None, isGeoreferenced=None, datasetKey=None, publishingCountry=None, typeStatus=None, issue=None, year=None, **kwargs): """ Returns occurrence counts for a predefined set of dimensions :param taxonKey: [int] A GBIF occurrence identifier :param basisOfRecord: [str] A GBIF occurrence identifier :param country: [str] A GBIF occurrence identifier :param isGeoreferenced: [bool] A GBIF occurrence identifier :param datasetKey: [str] A GBIF occurrence identifier :param publishingCountry: [str] A GBIF occurrence identifier :param typeStatus: [str] A GBIF occurrence identifier :param issue: [str] A GBIF occurrence identifier :param year: [int] A GBIF occurrence identifier :return: dict Usage:: from pygbif import occurrences occurrences.count(taxonKey = 3329049) occurrences.count(country = 'CA') occurrences.count(isGeoreferenced = True) occurrences.count(basisOfRecord = 'OBSERVATION') """ url = gbif_baseurl + "occurrence/count" isGeoreferenced = bool2str(isGeoreferenced) out = gbif_GET( url, { "taxonKey": taxonKey, "basisOfRecord": basisOfRecord, "country": country, "isGeoreferenced": isGeoreferenced, "datasetKey": datasetKey, "publishingCountry": publishingCountry, "typeStatus": typeStatus, "issue": issue, "year": year, }, **kwargs) return out
def count_countries(publishingCountry, **kwargs): """ Lists occurrence counts for all countries covered by the data published by the given country :param publishingCountry: [str] A two letter country code :return: dict Usage:: from pygbif import occurrences occurrences.count_countries(publishingCountry = "DE") """ url = gbif_baseurl + "occurrence/counts/countries" out = gbif_GET(url, {"publishingCountry": publishingCountry}, **kwargs) return out
def count_publishingcountries(country, **kwargs): """ Lists occurrence counts for all countries that publish data about the given country :param country: [str] A country, two letter code :return: dict Usage:: from pygbif import occurrences occurrences.count_publishingcountries(country = "DE") """ url = gbif_baseurl + "occurrence/counts/publishingCountries" out = gbif_GET(url, {"country": country}, **kwargs) return out
def count_year(year, **kwargs): """ Lists occurrence counts by year :param year: [int] year range, e.g., ``1990,2000``. Does not support ranges like ``asterisk,2010`` :return: dict Usage:: from pygbif import occurrences occurrences.count_year(year = '1990,2000') """ url = gbif_baseurl + "occurrence/counts/year" out = gbif_GET(url, {"year": year}, **kwargs) return out
def count_datasets(taxonKey=None, country=None, **kwargs): """ Lists occurrence counts for datasets that cover a given taxon or country :param taxonKey: [int] Taxon key :param country: [str] A country, two letter code :return: dict Usage:: from pygbif import occurrences occurrences.count_datasets(country = "DE") """ url = gbif_baseurl + "occurrence/counts/datasets" out = gbif_GET(url, {"taxonKey": taxonKey, "country": country}, **kwargs) return out
def get_fragment(key, **kwargs): """ Get a single occurrence fragment in its raw form (xml or json) :param key: [int] A GBIF occurrence key :return: A dictionary, of results Usage:: from pygbif import occurrences occurrences.get_fragment(key = 1052909293) occurrences.get_fragment(key = 1227768771) occurrences.get_fragment(key = 1227769518) """ url = gbif_baseurl + "occurrence/" + str(key) + "/fragment" out = gbif_GET(url, {}, **kwargs) return out
def get(key, **kwargs): """ Gets details for a single, interpreted occurrence :param key: [int] A GBIF occurrence key :return: A dictionary, of results Usage:: from pygbif import occurrences occurrences.get(key = 1258202889) occurrences.get(key = 1227768771) occurrences.get(key = 1227769518) """ url = gbif_baseurl + "occurrence/" + str(key) out = gbif_GET(url, {}, **kwargs) return out
def get_verbatim(key, **kwargs): """ Gets a verbatim occurrence record without any interpretation :param key: [int] A GBIF occurrence key :return: A dictionary, of results Usage:: from pygbif import occurrences occurrences.get_verbatim(key = 1258202889) occurrences.get_verbatim(key = 1227768771) occurrences.get_verbatim(key = 1227769518) """ url = gbif_baseurl + "occurrence/" + str(key) + "/verbatim" out = gbif_GET(url, {}, **kwargs) return out
def getdata(x, uuid, args, **kwargs): if x not in ["all", "deleted", "nonPublishing"] and uuid is None: stop( "You must specify a uuid if data does not equal all and data does not equal one of deleted or nonPublishing" ) if uuid is None: if x == "all": url = gbif_baseurl + "installation" else: url = gbif_baseurl + "installation/" + x else: if x == "all": url = gbif_baseurl + "installation/" + uuid else: url = gbif_baseurl + "installation/" + uuid + "/" + x res = gbif_GET(url, args, **kwargs) return {"meta": get_meta(res), "data": parse_results(res, uuid)}
def name_suggest(q=None, datasetKey=None, rank=None, limit=100, offset=None, **kwargs): """ A quick and simple autocomplete service that returns up to 20 name usages by doing prefix matching against the scientific name. Results are ordered by relevance. :param q: [str] Simple search parameter. The value for this parameter can be a simple word or a phrase. Wildcards can be added to the simple word parameters only, e.g. ``q=*puma*`` (Required) :param datasetKey: [str] Filters by the checklist dataset key (a uuid, see examples) :param rank: [str] A taxonomic rank. One of ``class``, ``cultivar``, ``cultivar_group``, ``domain``, ``family``, ``form``, ``genus``, ``informal``, ``infrageneric_name``, ``infraorder``, ``infraspecific_name``, ``infrasubspecific_name``, ``kingdom``, ``order``, ``phylum``, ``section``, ``series``, ``species``, ``strain``, ``subclass``, ``subfamily``, ``subform``, ``subgenus``, ``subkingdom``, ``suborder``, ``subphylum``, ``subsection``, ``subseries``, ``subspecies``, ``subtribe``, ``subvariety``, ``superclass``, ``superfamily``, ``superorder``, ``superphylum``, ``suprageneric_name``, ``tribe``, ``unranked``, or ``variety``. :param limit: [fixnum] Number of records to return. Maximum: ``1000``. (optional) :param offset: [fixnum] Record number to start at. (optional) :return: A dictionary References: http://www.gbif.org/developer/species#searching Usage:: from pygbif import species species.name_suggest(q='Puma concolor') x = species.name_suggest(q='Puma') species.name_suggest(q='Puma', rank="genus") species.name_suggest(q='Puma', rank="subspecies") species.name_suggest(q='Puma', rank="species") species.name_suggest(q='Puma', rank="infraspecific_name") species.name_suggest(q='Puma', limit=2) """ url = gbif_baseurl + "species/suggest" args = {"q": q, "rank": rank, "offset": offset, "limit": limit} return gbif_GET(url, args, **kwargs)
def getdata(x, uuid, args, **kwargs): if x != "all" and uuid is None: stop('You must specify a uuid if data does not equal "all"') if uuid is None: if x == "all": url = gbif_baseurl + "node" else: if isocode is not None and x == "country": url = gbif_baseurl + "node/country/" + isocode else: url = gbif_baseurl + "node/" + x else: if x == "all": url = gbif_baseurl + "node/" + uuid else: url = gbif_baseurl + "node/" + uuid + "/" + x res = gbif_GET(url, args, **kwargs) return {"meta": get_meta(res), "data": parse_results(res, uuid)}
def getdata(x, uuid, args, **kwargs): nouuid = ["all", "deleted", "pending", "nonPublishing"] if x not in nouuid and uuid is None: stop( 'You must specify a uuid if data does not equal "all" and data does not equal one of ' + ", ".join(nouuid)) if uuid is None: if x == "all": url = gbif_baseurl + "organization" else: url = gbif_baseurl + "organization/" + x else: if x == "all": url = gbif_baseurl + "organization/" + uuid else: url = gbif_baseurl + "organization/" + uuid + "/" + x res = gbif_GET(url, args, **kwargs) return {"meta": get_meta(res), "data": parse_results(res, uuid)}
def datasets_fetch(x, uuid, args, **kwargs): if (x not in [ "all", "deleted", "duplicate", "subDataset", "withNoEndpoint" ] and uuid is None): raise TypeError( "You must specify a uuid if data does not equal all and data does not equal of deleted, duplicate, subDataset, or withNoEndpoint" ) if uuid is None: if x == "all": url = gbif_baseurl + "dataset" else: if id is not None and x == "metadata": url = gbif_baseurl + "dataset/metadata/" + id + "/document" else: url = gbif_baseurl + "dataset/" + x else: if x == "all": url = gbif_baseurl + "dataset/" + uuid else: url = gbif_baseurl + "dataset/" + uuid + "/" + x res = gbif_GET(url, args, **kwargs) return res
def name_lookup(q=None, rank=None, higherTaxonKey=None, status=None, isExtinct=None, habitat=None, nameType=None, datasetKey=None, nomenclaturalStatus=None, limit=100, offset=None, facet=False, facetMincount=None, facetMultiselect=None, type=None, hl=False, verbose=False, **kwargs): """ Lookup names in all taxonomies in GBIF. This service uses fuzzy lookup so that you can put in partial names and you should get back those things that match. See examples below. :param q: [str] Query term(s) for full text search (optional) :param rank: [str] ``CLASS``, ``CULTIVAR``, ``CULTIVAR_GROUP``, ``DOMAIN``, ``FAMILY``, ``FORM``, ``GENUS``, ``INFORMAL``, ``INFRAGENERIC_NAME``, ``INFRAORDER``, ``INFRASPECIFIC_NAME``, ``INFRASUBSPECIFIC_NAME``, ``KINGDOM``, ``ORDER``, ``PHYLUM``, ``SECTION``, ``SERIES``, ``SPECIES``, ``STRAIN``, ``SUBCLASS``, ``SUBFAMILY``, ``SUBFORM``, ``SUBGENUS``, ``SUBKINGDOM``, ``SUBORDER``, ``SUBPHYLUM``, ``SUBSECTION``, ``SUBSERIES``, ``SUBSPECIES``, ``SUBTRIBE``, ``SUBVARIETY``, ``SUPERCLASS``, ``SUPERFAMILY``, ``SUPERORDER``, ``SUPERPHYLUM``, ``SUPRAGENERIC_NAME``, ``TRIBE``, ``UNRANKED``, ``VARIETY`` (optional) :param verbose: [bool] If ``True`` show alternative matches considered which had been rejected. :param higherTaxonKey: [str] Filters by any of the higher Linnean rank keys. Note this is within the respective checklist and not searching nub keys across all checklists (optional) :param status: [str] (optional) Filters by the taxonomic status as one of: * ``ACCEPTED`` * ``DETERMINATION_SYNONYM`` Used for unknown child taxa referred to via spec, ssp, ... * ``DOUBTFUL`` Treated as accepted, but doubtful whether this is correct. * ``HETEROTYPIC_SYNONYM`` More specific subclass of ``SYNONYM``. * ``HOMOTYPIC_SYNONYM`` More specific subclass of ``SYNONYM``. * ``INTERMEDIATE_RANK_SYNONYM`` Used in nub only. * ``MISAPPLIED`` More specific subclass of ``SYNONYM``. * ``PROPARTE_SYNONYM`` More specific subclass of ``SYNONYM``. * ``SYNONYM`` A general synonym, the exact type is unknown. :param isExtinct: [bool] Filters by extinction status (e.g. ``isExtinct=True``) :param habitat: [str] Filters by habitat. One of: ``marine``, ``freshwater``, or ``terrestrial`` (optional) :param nameType: [str] (optional) Filters by the name type as one of: * ``BLACKLISTED`` surely not a scientific name. * ``CANDIDATUS`` Candidatus is a component of the taxonomic name for a bacterium that cannot be maintained in a Bacteriology Culture Collection. * ``CULTIVAR`` a cultivated plant name. * ``DOUBTFUL`` doubtful whether this is a scientific name at all. * ``HYBRID`` a hybrid formula (not a hybrid name). * ``INFORMAL`` a scientific name with some informal addition like "cf." or indetermined like Abies spec. * ``SCINAME`` a scientific name which is not well formed. * ``VIRUS`` a virus name. * ``WELLFORMED`` a well formed scientific name according to present nomenclatural rules. :param datasetKey: [str] Filters by the dataset's key (a uuid) (optional) :param nomenclaturalStatus: [str] Not yet implemented, but will eventually allow for filtering by a nomenclatural status enum :param limit: [fixnum] Number of records to return. Maximum: ``1000``. (optional) :param offset: [fixnum] Record number to start at. (optional) :param facet: [str] A list of facet names used to retrieve the 100 most frequent values for a field. Allowed facets are: ``datasetKey``, ``higherTaxonKey``, ``rank``, ``status``, ``isExtinct``, ``habitat``, and ``nameType``. Additionally ``threat`` and ``nomenclaturalStatus`` are legal values but not yet implemented, so data will not yet be returned for them. (optional) :param facetMincount: [str] Used in combination with the facet parameter. Set ``facetMincount={#}`` to exclude facets with a count less than {#}, e.g. http://bit.ly/1bMdByP only shows the type value ``ACCEPTED`` because the other statuses have counts less than 7,000,000 (optional) :param facetMultiselect: [bool] Used in combination with the facet parameter. Set ``facetMultiselect=True`` to still return counts for values that are not currently filtered, e.g. http://bit.ly/19YLXPO still shows all status values even though status is being filtered by ``status=ACCEPTED`` (optional) :param type: [str] Type of name. One of ``occurrence``, ``checklist``, or ``metadata``. (optional) :param hl: [bool] Set ``hl=True`` to highlight terms matching the query when in fulltext search fields. The highlight will be an emphasis tag of class ``gbifH1`` e.g. ``q='plant', hl=True``. Fulltext search fields include: ``title``, ``keyword``, ``country``, ``publishing country``, ``publishing organization title``, ``hosting organization title``, and ``description``. One additional full text field is searched which includes information from metadata documents, but the text of this field is not returned in the response. (optional) :return: A dictionary :references: http://www.gbif.org/developer/species#searching Usage:: from pygbif import species # Look up names like mammalia species.name_lookup(q='mammalia') # Paging species.name_lookup(q='mammalia', limit=1) species.name_lookup(q='mammalia', limit=1, offset=2) # large requests, use offset parameter first = species.name_lookup(q='mammalia', limit=1000) second = species.name_lookup(q='mammalia', limit=1000, offset=1000) # Get all data and parse it, removing descriptions which can be quite long species.name_lookup('Helianthus annuus', rank="species", verbose=True) # Get all data and parse it, removing descriptions field which can be quite long out = species.name_lookup('Helianthus annuus', rank="species") res = out['results'] [ z.pop('descriptions', None) for z in res ] res # Fuzzy searching species.name_lookup(q='Heli', rank="genus") # Limit records to certain number species.name_lookup('Helianthus annuus', rank="species", limit=2) # Query by habitat species.name_lookup(habitat = "terrestrial", limit=2) species.name_lookup(habitat = "marine", limit=2) species.name_lookup(habitat = "freshwater", limit=2) # Using faceting species.name_lookup(facet='status', limit=0, facetMincount='70000') species.name_lookup(facet=['status', 'higherTaxonKey'], limit=0, facetMincount='700000') species.name_lookup(facet='nameType', limit=0) species.name_lookup(facet='habitat', limit=0) species.name_lookup(facet='datasetKey', limit=0) species.name_lookup(facet='rank', limit=0) species.name_lookup(facet='isExtinct', limit=0) # text highlighting species.name_lookup(q='plant', hl=True, limit=30) # Lookup by datasetKey species.name_lookup(datasetKey='3f8a1297-3259-4700-91fc-acc4170b27ce') """ args = { "q": q, "rank": rank, "higherTaxonKey": higherTaxonKey, "status": status, "isExtinct": bool2str(isExtinct), "habitat": habitat, "nameType": nameType, "datasetKey": datasetKey, "nomenclaturalStatus": nomenclaturalStatus, "limit": limit, "offset": offset, "facet": bn(facet), "facetMincount": facetMincount, "facetMultiselect": bool2str(facetMultiselect), "hl": bool2str(hl), "verbose": bool2str(verbose), "type": type, } gbif_kwargs = { key: kwargs[key] for key in kwargs if key not in requests_argset } if gbif_kwargs is not None: xx = dict( zip([re.sub("_", ".", x) for x in gbif_kwargs.keys()], gbif_kwargs.values())) args.update(xx) kwargs = {key: kwargs[key] for key in kwargs if key in requests_argset} return gbif_GET(gbif_baseurl + "species/search", args, **kwargs)
def search(taxonKey=None, repatriated=None, kingdomKey=None, phylumKey=None, classKey=None, orderKey=None, familyKey=None, genusKey=None, subgenusKey=None, scientificName=None, country=None, publishingCountry=None, hasCoordinate=None, typeStatus=None, recordNumber=None, lastInterpreted=None, continent=None, geometry=None, recordedBy=None, recordedByID=None, identifiedByID=None, basisOfRecord=None, datasetKey=None, eventDate=None, catalogNumber=None, year=None, month=None, decimalLatitude=None, decimalLongitude=None, elevation=None, depth=None, institutionCode=None, collectionCode=None, hasGeospatialIssue=None, issue=None, q=None, spellCheck=None, mediatype=None, limit=300, offset=0, establishmentMeans=None, facet=None, facetMincount=None, facetMultiselect=None, **kwargs): """ Search GBIF occurrences :param taxonKey: [int] A GBIF occurrence identifier :param q: [str] Simple search parameter. The value for this parameter can be a simple word or a phrase. :param spellCheck: [bool] If ``True`` ask GBIF to check your spelling of the value passed to the ``search`` parameter. IMPORTANT: This only checks the input to the ``search`` parameter, and no others. Default: ``False`` :param repatriated: [str] Searches for records whose publishing country is different to the country where the record was recorded in :param kingdomKey: [int] Kingdom classification key :param phylumKey: [int] Phylum classification key :param classKey: [int] Class classification key :param orderKey: [int] Order classification key :param familyKey: [int] Family classification key :param genusKey: [int] Genus classification key :param subgenusKey: [int] Subgenus classification key :param scientificName: [str] A scientific name from the GBIF backbone. All included and synonym taxa are included in the search. :param datasetKey: [str] The occurrence dataset key (a uuid) :param catalogNumber: [str] An identifier of any form assigned by the source within a physical collection or digital dataset for the record which may not unique, but should be fairly unique in combination with the institution and collection code. :param recordedBy: [str] The person who recorded the occurrence. :param recordedByID: [str] Identifier (e.g. ORCID) for the person who recorded the occurrence :param identifiedByID: [str] Identifier (e.g. ORCID) for the person who provided the taxonomic identification of the occurrence. :param collectionCode: [str] An identifier of any form assigned by the source to identify the physical collection or digital dataset uniquely within the text of an institution. :param institutionCode: [str] An identifier of any form assigned by the source to identify the institution the record belongs to. Not guaranteed to be que. :param country: [str] The 2-letter country code (as per ISO-3166-1) of the country in which the occurrence was recorded. See here http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 :param basisOfRecord: [str] Basis of record, as defined in our BasisOfRecord enum here http://gbif.github.io/gbif-api/apidocs/org/gbif/api/vocabulary/BasisOfRecord.html Acceptable values are: - ``FOSSIL_SPECIMEN`` An occurrence record describing a fossilized specimen. - ``HUMAN_OBSERVATION`` An occurrence record describing an observation made by one or more people. - ``LITERATURE`` An occurrence record based on literature alone. - ``LIVING_SPECIMEN`` An occurrence record describing a living specimen, e.g. - ``MACHINE_OBSERVATION`` An occurrence record describing an observation made by a machine. - ``OBSERVATION`` An occurrence record describing an observation. - ``PRESERVED_SPECIMEN`` An occurrence record describing a preserved specimen. - ``UNKNOWN`` Unknown basis for the record. :param eventDate: [date] Occurrence date in ISO 8601 format: yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Supports range queries, smaller,larger (e.g., ``1990,1991``, whereas ``1991,1990`` wouldn't work) :param year: [int] The 4 digit year. A year of 98 will be interpreted as AD 98. Supports range queries, smaller,larger (e.g., ``1990,1991``, whereas ``1991,1990`` wouldn't work) :param month: [int] The month of the year, starting with 1 for January. Supports range queries, smaller,larger (e.g., ``1,2``, whereas ``2,1`` wouldn't work) :param decimalLatitude: [float] Latitude in decimals between -90 and 90 based on WGS 84. Supports range queries, smaller,larger (e.g., ``25,30``, whereas ``30,25`` wouldn't work) :param decimalLongitude: [float] Longitude in decimals between -180 and 180 based on WGS 84. Supports range queries (e.g., ``-0.4,-0.2``, whereas ``-0.2,-0.4`` wouldn't work). :param publishingCountry: [str] The 2-letter country code (as per ISO-3166-1) of the country in which the occurrence was recorded. :param elevation: [int/str] Elevation in meters above sea level. Supports range queries, smaller,larger (e.g., ``5,30``, whereas ``30,5`` wouldn't work) :param depth: [int/str] Depth in meters relative to elevation. For example 10 meters below a lake surface with given elevation. Supports range queries, smaller,larger (e.g., ``5,30``, whereas ``30,5`` wouldn't work) :param geometry: [str] Searches for occurrences inside a polygon described in Well Known Text (WKT) format. A WKT shape written as either POINT, LINESTRING, LINEARRING POLYGON, or MULTIPOLYGON. Example of a polygon: ``((30.1 10.1, 20, 20 40, 40 40, 30.1 10.1))`` would be queried as http://bit.ly/1BzNwDq. Polygons must have counter-clockwise ordering of points. :param hasGeospatialIssue: [bool] Includes/excludes occurrence records which contain spatial issues (as determined in our record interpretation), i.e. ``hasGeospatialIssue=TRUE`` returns only those records with spatial issues while ``hasGeospatialIssue=FALSE`` includes only records without spatial issues. The absence of this parameter returns any record with or without spatial issues. :param issue: [str] One or more of many possible issues with each occurrence record. See Details. Issues passed to this parameter filter results by the issue. :param hasCoordinate: [bool] Return only occurence records with lat/long data (``True``) or all records (``False``, default). :param typeStatus: [str] Type status of the specimen. One of many options. See ?typestatus :param recordNumber: [int] Number recorded by collector of the data, different from GBIF record number. See http://rs.tdwg.org/dwc/terms/#recordNumber} for more info :param lastInterpreted: [date] Date the record was last modified in GBIF, in ISO 8601 format: yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Supports range queries, smaller,larger (e.g., ``1990,1991``, whereas ``1991,1990`` wouldn't work) :param continent: [str] Continent. One of ``africa``, ``antarctica``, ``asia``, ``europe``, ``north_america`` (North America includes the Caribbean and reachies down and includes Panama), ``oceania``, or ``south_america`` :param fields: [str] Default (``all``) returns all fields. ``minimal`` returns just taxon name, key, latitude, and longitude. Or specify each field you want returned by name, e.g. ``fields = c('name','latitude','elevation')``. :param mediatype: [str] Media type. Default is ``NULL``, so no filtering on mediatype. Options: ``NULL``, ``MovingImage``, ``Sound``, and ``StillImage`` :param limit: [int] Number of results to return. Default: ``300`` :param offset: [int] Record to start at. Default: ``0`` :param facet: [str] a character vector of length 1 or greater :param establishmentMeans: [str] EstablishmentMeans, possible values include: INTRODUCED, INVASIVE, MANAGED, NATIVE, NATURALISED, UNCERTAIN :param facetMincount: [int] minimum number of records to be included in the faceting results :param facetMultiselect: [bool] Set to ``True`` to still return counts for values that are not currently filtered. See examples. Default: ``False`` :return: A dictionary Usage:: from pygbif import occurrences occurrences.search(taxonKey = 3329049) # Return 2 results, this is the default by the way occurrences.search(taxonKey=3329049, limit=2) # Instead of getting a taxon key first, you can search for a name directly # However, note that using this approach (with `scientificName="..."`) # you are getting synonyms too. The results for using `scientifcName` and # `taxonKey` parameters are the same in this case, but I wouldn't be surprised if for some # names they return different results occurrences.search(scientificName = 'Ursus americanus') from pygbif import species key = species.name_backbone(name = 'Ursus americanus', rank='species')['usageKey'] occurrences.search(taxonKey = key) # Search by dataset key occurrences.search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a', limit=20) # Search by catalog number occurrences.search(catalogNumber="49366", limit=20) # occurrences.search(catalogNumber=["49366","Bird.27847588"], limit=20) # Use paging parameters (limit and offset) to page. Note the different results # for the two queries below. occurrences.search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a', offset=10, limit=5) occurrences.search(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a', offset=20, limit=5) # Many dataset keys # occurrences.search(datasetKey=["50c9509d-22c7-4a22-a47d-8c48425ef4a7", "7b5d6a48-f762-11e1-a439-00145eb45e9a"], limit=20) # Search by collector name res = occurrences.search(recordedBy="smith", limit=20) [ x['recordedBy'] for x in res['results'] ] # Many collector names # occurrences.search(recordedBy=["smith","BJ Stacey"], limit=20) # recordedByID occurrences.search(recordedByID="https://orcid.org/0000-0003-1691-239X", limit = 3) # identifiedByID occurrences.search(identifiedByID="https://orcid.org/0000-0003-1691-239X", limit = 3) # Search for many species splist = ['Cyanocitta stelleri', 'Junco hyemalis', 'Aix sponsa'] keys = [ species.name_suggest(x)[0]['key'] for x in splist ] out = [ occurrences.search(taxonKey = x, limit=1) for x in keys ] [ x['results'][0]['speciesKey'] for x in out ] # Search - q parameter occurrences.search(q = "kingfisher", limit=20) ## spell check - only works with the `search` parameter ### spelled correctly - same result as above call occurrences.search(q = "kingfisher", limit=20, spellCheck = True) ### spelled incorrectly - stops with suggested spelling occurrences.search(q = "kajsdkla", limit=20, spellCheck = True) ### spelled incorrectly - stops with many suggested spellings ### and number of results for each occurrences.search(q = "helir", limit=20, spellCheck = True) # Search on latitidue and longitude occurrences.search(decimalLatitude=50, decimalLongitude=10, limit=2) # Search on a bounding box ## in well known text format occurrences.search(geometry='POLYGON((30.1 10.1, 10 20, 20 40, 40 40, 30.1 10.1))', limit=20) from pygbif import species key = species.name_suggest(q='Aesculus hippocastanum')[0]['key'] occurrences.search(taxonKey=key, geometry='POLYGON((30.1 10.1, 10 20, 20 40, 40 40, 30.1 10.1))', limit=20) ## multipolygon wkt = 'MULTIPOLYGON(((-123 38, -123 43, -116 43, -116 38, -123 38)),((-97 41, -97 45, -93 45, -93 41, -97 41)))' occurrences.search(geometry = wkt, limit = 20) # Search on country occurrences.search(country='US', limit=20) occurrences.search(country='FR', limit=20) occurrences.search(country='DE', limit=20) # Get only occurrences with lat/long data occurrences.search(taxonKey=key, hasCoordinate=True, limit=20) # Get only occurrences that were recorded as living specimens occurrences.search(taxonKey=key, basisOfRecord="LIVING_SPECIMEN", hasCoordinate=True, limit=20) # Get occurrences for a particular eventDate occurrences.search(taxonKey=key, eventDate="2013", limit=20) occurrences.search(taxonKey=key, year="2013", limit=20) occurrences.search(taxonKey=key, month="6", limit=20) # Get occurrences based on depth key = species.name_backbone(name='Salmo salar', kingdom='animals')['usageKey'] occurrences.search(taxonKey=key, depth="5", limit=20) # Get occurrences based on elevation key = species.name_backbone(name='Puma concolor', kingdom='animals')['usageKey'] occurrences.search(taxonKey=key, elevation=50, hasCoordinate=True, limit=20) # Get occurrences based on institutionCode occurrences.search(institutionCode="TLMF", limit=20) # Get occurrences based on collectionCode occurrences.search(collectionCode="Floristic Databases MV - Higher Plants", limit=20) # Get only those occurrences with spatial issues occurrences.search(taxonKey=key, hasGeospatialIssue=True, limit=20) # Search using a query string occurrences.search(q="kingfisher", limit=20) # Range queries ## See Detail for parameters that support range queries ### this is a range depth, with lower/upper limits in character string occurrences.search(depth='50,100') ## Range search with year occurrences.search(year='1999,2000', limit=20) ## Range search with latitude occurrences.search(decimalLatitude='29.59,29.6') # Search by specimen type status ## Look for possible values of the typeStatus parameter looking at the typestatus dataset occurrences.search(typeStatus = 'allotype') # Search by specimen record number ## This is the record number of the person/group that submitted the data, not GBIF's numbers ## You can see that many different groups have record number 1, so not super helpful occurrences.search(recordNumber = 1) # Search by last time interpreted: Date the record was last modified in GBIF ## The lastInterpreted parameter accepts ISO 8601 format dates, including ## yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Range queries are accepted for lastInterpreted occurrences.search(lastInterpreted = '2014-04-01') # Search by continent ## One of africa, antarctica, asia, europe, north_america, oceania, or south_america occurrences.search(continent = 'south_america') occurrences.search(continent = 'africa') occurrences.search(continent = 'oceania') occurrences.search(continent = 'antarctica') # Search for occurrences with images occurrences.search(mediatype = 'StillImage') occurrences.search(mediatype = 'MovingImage') x = occurrences.search(mediatype = 'Sound') [z['media'] for z in x['results']] # Query based on issues occurrences.search(taxonKey=1, issue='DEPTH_UNLIKELY') occurrences.search(taxonKey=1, issue=['DEPTH_UNLIKELY','COORDINATE_ROUNDED']) # Show all records in the Arizona State Lichen Collection that cant be matched to the GBIF # backbone properly: occurrences.search(datasetKey='84c0e1a0-f762-11e1-a439-00145eb45e9a', issue=['TAXON_MATCH_NONE','TAXON_MATCH_HIGHERRANK']) # If you pass in an invalid polygon you get hopefully informative errors ### the WKT string is fine, but GBIF says bad polygon wkt = 'POLYGON((-178.59375 64.83258989321493,-165.9375 59.24622380205539, -147.3046875 59.065977905449806,-130.78125 51.04484764446178,-125.859375 36.70806354647625, -112.1484375 23.367471303759686,-105.1171875 16.093320185359257,-86.8359375 9.23767076398516, -82.96875 2.9485268155066175,-82.6171875 -14.812060061226388,-74.8828125 -18.849111862023985, -77.34375 -47.661687803329166,-84.375 -49.975955187343295,174.7265625 -50.649460483096114, 179.296875 -42.19189902447192,-176.8359375 -35.634976650677295,176.8359375 -31.835565983656227, 163.4765625 -6.528187613695323,152.578125 1.894796132058301,135.703125 4.702353722559447, 127.96875 15.077427674847987,127.96875 23.689804541429606,139.921875 32.06861069132688, 149.4140625 42.65416193033991,159.2578125 48.3160811030533,168.3984375 57.019804336633165, 178.2421875 59.95776046458139,-179.6484375 61.16708631440347,-178.59375 64.83258989321493))' occurrences.search(geometry = wkt) # Faceting ## return no occurrence records with limit=0 x = occurrences.search(facet = "country", limit = 0) x['facets'] ## also return occurrence records x = occurrences.search(facet = "establishmentMeans", limit = 10) x['facets'] x['results'] ## multiple facet variables x = occurrences.search(facet = ["country", "basisOfRecord"], limit = 10) x['results'] x['facets'] x['facets']['country'] x['facets']['basisOfRecord'] x['facets']['basisOfRecord']['count'] ## set a minimum facet count x = occurrences.search(facet = "country", facetMincount = 30000000L, limit = 0) x['facets'] ## paging per each faceted variable ### do so by passing in variables like "country" + "_facetLimit" = "country_facetLimit" ### or "country" + "_facetOffset" = "country_facetOffset" x = occurrences.search( facet = ["country", "basisOfRecord", "hasCoordinate"], country_facetLimit = 3, basisOfRecord_facetLimit = 6, limit = 0 ) x['facets'] # requests package options ## There's an acceptable set of requests options (['timeout', 'cookies', 'auth', ## 'allow_redirects', 'proxies', 'verify', 'stream', 'cert']) you can pass ## in via **kwargs, e.g., set a timeout x = occurrences.search(timeout = 1) """ url = gbif_baseurl + "occurrence/search" args = { "taxonKey": taxonKey, "repatriated": repatriated, "kingdomKey": kingdomKey, "phylumKey": phylumKey, "classKey": classKey, "orderKey": orderKey, "familyKey": familyKey, "genusKey": genusKey, "subgenusKey": subgenusKey, "scientificName": scientificName, "country": country, "publishingCountry": publishingCountry, "hasCoordinate": bool2str(hasCoordinate), "typeStatus": typeStatus, "recordNumber": recordNumber, "lastInterpreted": lastInterpreted, "continent": continent, "geometry": geometry, "recordedBy": recordedBy, "recordedByID": recordedByID, "identifiedByID": identifiedByID, "basisOfRecord": basisOfRecord, "datasetKey": datasetKey, "eventDate": eventDate, "catalogNumber": catalogNumber, "year": year, "month": month, "decimalLatitude": decimalLatitude, "decimalLongitude": decimalLongitude, "elevation": elevation, "depth": depth, "institutionCode": institutionCode, "collectionCode": collectionCode, "hasGeospatialIssue": bool2str(hasGeospatialIssue), "issue": issue, "q": q, "spellCheck": bool2str(spellCheck), "mediatype": mediatype, "limit": limit, "offset": offset, "establishmentMeans": establishmentMeans, "facetMincount": facetMincount, "facet": facet, "facetMultiselect": bool2str(facetMultiselect), } gbif_kwargs = { key: kwargs[key] for key in kwargs if key not in requests_argset } if gbif_kwargs is not None: xx = dict( zip([re.sub("_", ".", x) for x in gbif_kwargs.keys()], gbif_kwargs.values())) args.update(xx) kwargs = {key: kwargs[key] for key in kwargs if key in requests_argset} out = gbif_GET(url, args, **kwargs) return out
def name_backbone(name, rank=None, kingdom=None, phylum=None, clazz=None, order=None, family=None, genus=None, strict=False, verbose=False, offset=None, limit=100, **kwargs): """ Lookup names in the GBIF backbone taxonomy. :param name: [str] Full scientific name potentially with authorship (required) :param rank: [str] The rank given as our rank enum. (optional) :param kingdom: [str] If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) :param phylum: [str] If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) :param class: [str] If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) :param order: [str] If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) :param family: [str] If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) :param genus: [str] If provided default matching will also try to match against this if no direct match is found for the name alone. (optional) :param strict: [bool] If ``True`` it (fuzzy) matches only the given name, but never a taxon in the upper classification (optional) :param verbose: [bool] If ``True`` show alternative matches considered which had been rejected. :param offset: [int] Record to start at. Default: ``0`` :param limit: [int] Number of results to return. Default: ``100`` If you are looking for behavior similar to the GBIF website when you search for a name, `name_backbone` may be what you want. For example, a search for *Lantanophaga pusillidactyla* on the GBIF website and with `name_backbone` will give back as a first result the correct name *Lantanophaga pusillidactylus*. A list for a single taxon with many slots (with ``verbose=False`` - default), or a list of length two, first element for the suggested taxon match, and a data.frame with alternative name suggestions resulting from fuzzy matching (with ``verbose=True``). If you don't get a match GBIF gives back a list of length 3 with slots synonym, confidence, and ``matchType='NONE'``. reference: https://www.gbif.org/developer/species#searching Usage:: from pygbif import species species.name_backbone(name='Helianthus annuus', kingdom='plants') species.name_backbone(name='Helianthus', rank='genus', kingdom='plants') species.name_backbone(name='Poa', rank='genus', family='Poaceae') # Verbose - gives back alternatives species.name_backbone(name='Helianthus annuus', kingdom='plants', verbose=True) # Strictness species.name_backbone(name='Poa', kingdom='plants', verbose=True, strict=False) species.name_backbone(name='Helianthus annuus', kingdom='plants', verbose=True, strict=True) # Non-existent name species.name_backbone(name='Aso') # Multiple equal matches species.name_backbone(name='Oenante') """ url = gbif_baseurl + "species/match" args = { "name": name, "rank": rank, "kingdom": kingdom, "phylum": phylum, "class": clazz, "order": order, "family": family, "genus": genus, "strict": bool2str(strict), "verbose": bool2str(verbose), "offset": offset, "limit": limit, } tt = gbif_GET(url, args, **kwargs) return tt
def dataset_search(q=None, type=None, keyword=None, owningOrg=None, publishingOrg=None, hostingOrg=None, decade=None, publishingCountry=None, facet=None, facetMincount=None, facetMultiselect=None, hl=False, limit=100, offset=None, **kwargs): """ Full text search across all datasets. Results are ordered by relevance. :param q: [str] Query term(s) for full text search. The value for this parameter can be a simple word or a phrase. Wildcards can be added to the simple word parameters only, e.g. ``q=*puma*`` :param type: [str] Type of dataset, options include OCCURRENCE, etc. :param keyword: [str] Keyword to search by. Datasets can be tagged by keywords, which you can search on. The search is done on the merged collection of tags, the dataset keywordCollections and temporalCoverages. SEEMS TO NOT BE WORKING ANYMORE AS OF 2016-09-02. :param owningOrg: [str] Owning organization. A uuid string. See :func:`~pygbif.registry.organizations` :param publishingOrg: [str] Publishing organization. A uuid string. See :func:`~pygbif.registry.organizations` :param hostingOrg: [str] Hosting organization. A uuid string. See :func:`~pygbif.registry.organizations` :param publishingCountry: [str] Publishing country. :param decade: [str] Decade, e.g., 1980. Filters datasets by their temporal coverage broken down to decades. Decades are given as a full year, e.g. 1880, 1960, 2000, etc, and will return datasets wholly contained in the decade as well as those that cover the entire decade or more. Facet by decade to get the break down, e.g. ``/search?facet=DECADE&facet_only=true`` (see example below) :param facet: [str] A list of facet names used to retrieve the 100 most frequent values for a field. Allowed facets are: type, keyword, publishingOrg, hostingOrg, decade, and publishingCountry. Additionally subtype and country are legal values but not yet implemented, so data will not yet be returned for them. :param facetMincount: [str] Used in combination with the facet parameter. Set facetMincount={#} to exclude facets with a count less than {#}, e.g. http://api.gbif.org/v1/dataset/search?facet=type&limit=0&facetMincount=10000 only shows the type value 'OCCURRENCE' because 'CHECKLIST' and 'METADATA' have counts less than 10000. :param facetMultiselect: [bool] Used in combination with the facet parameter. Set ``facetMultiselect=True`` to still return counts for values that are not currently filtered, e.g. http://api.gbif.org/v1/dataset/search?facet=type&limit=0&type=CHECKLIST&facetMultiselect=true still shows type values 'OCCURRENCE' and 'METADATA' even though type is being filtered by ``type=CHECKLIST`` :param hl: [bool] Set ``hl=True`` to highlight terms matching the query when in fulltext search fields. The highlight will be an emphasis tag of class 'gbifH1' e.g. http://api.gbif.org/v1/dataset/search?q=plant&hl=true Fulltext search fields include: title, keyword, country, publishing country, publishing organization title, hosting organization title, and description. One additional full text field is searched which includes information from metadata documents, but the text of this field is not returned in the response. :param limit: [int] Number of results to return. Default: ``300`` :param offset: [int] Record to start at. Default: ``0`` :note: Note that you can pass in additional faceting parameters on a per field basis. For example, if you want to limit the numbef of facets returned from a field ``foo`` to 3 results, pass in ``foo_facetLimit = 3``. GBIF does not allow all per field parameters, but does allow some. See also examples. :return: A dictionary References: http://www.gbif.org/developer/registry#datasetSearch Usage:: from pygbif import registry # Gets all datasets of type "OCCURRENCE". registry.dataset_search(type="OCCURRENCE", limit = 10) # Fulltext search for all datasets having the word "amsterdam" somewhere in # its metadata (title, description, etc). registry.dataset_search(q="amsterdam", limit = 10) # Limited search registry.dataset_search(type="OCCURRENCE", limit=2) registry.dataset_search(type="OCCURRENCE", limit=2, offset=10) # Search by decade registry.dataset_search(decade=1980, limit = 10) # Faceting ## just facets registry.dataset_search(facet="decade", facetMincount=10, limit=0) ## data and facets registry.dataset_search(facet="decade", facetMincount=10, limit=2) ## many facet variables registry.dataset_search(facet=["decade", "type"], facetMincount=10, limit=0) ## facet vars ### per variable paging x = registry.dataset_search( facet = ["decade", "type"], decade_facetLimit = 3, type_facetLimit = 3, limit = 0 ) ## highlight x = registry.dataset_search(q="plant", hl=True, limit = 10) [ z['description'] for z in x['results'] ] """ url = gbif_baseurl + "dataset/search" args = { "q": q, "type": type, "keyword": keyword, "owningOrg": owningOrg, "publishingOrg": publishingOrg, "hostingOrg": hostingOrg, "decade": decade, "publishingCountry": publishingCountry, "facet": facet, "facetMincount": facetMincount, "facetMultiselect": bool2str(facetMultiselect), "hl": bool2str(hl), "limit": limit, "offset": offset, } gbif_kwargs = { key: kwargs[key] for key in kwargs if key not in requests_argset } if gbif_kwargs is not None: xx = dict( zip([re.sub("_", ".", x) for x in gbif_kwargs.keys()], gbif_kwargs.values())) args.update(xx) kwargs = {key: kwargs[key] for key in kwargs if key in requests_argset} out = gbif_GET(url, args, **kwargs) return out
def dataset_suggest(q=None, type=None, keyword=None, owningOrg=None, publishingOrg=None, hostingOrg=None, publishingCountry=None, decade=None, limit=100, offset=None, **kwargs): """ Search that returns up to 20 matching datasets. Results are ordered by relevance. :param q: [str] Query term(s) for full text search. The value for this parameter can be a simple word or a phrase. Wildcards can be added to the simple word parameters only, e.g. ``q=*puma*`` :param type: [str] Type of dataset, options include OCCURRENCE, etc. :param keyword: [str] Keyword to search by. Datasets can be tagged by keywords, which you can search on. The search is done on the merged collection of tags, the dataset keywordCollections and temporalCoverages. SEEMS TO NOT BE WORKING ANYMORE AS OF 2016-09-02. :param owningOrg: [str] Owning organization. A uuid string. See :func:`~pygbif.registry.organizations` :param publishingOrg: [str] Publishing organization. A uuid string. See :func:`~pygbif.registry.organizations` :param hostingOrg: [str] Hosting organization. A uuid string. See :func:`~pygbif.registry.organizations` :param publishingCountry: [str] Publishing country. :param decade: [str] Decade, e.g., 1980. Filters datasets by their temporal coverage broken down to decades. Decades are given as a full year, e.g. 1880, 1960, 2000, etc, and will return datasets wholly contained in the decade as well as those that cover the entire decade or more. Facet by decade to get the break down, e.g. ``/search?facet=DECADE&facet_only=true`` (see example below) :param limit: [int] Number of results to return. Default: ``300`` :param offset: [int] Record to start at. Default: ``0`` :return: A dictionary References: http://www.gbif.org/developer/registry#datasetSearch Usage:: from pygbif import registry registry.dataset_suggest(q="Amazon", type="OCCURRENCE") # Suggest datasets tagged with keyword "france". registry.dataset_suggest(keyword="france") # Suggest datasets owned by the organization with key # "07f617d0-c688-11d8-bf62-b8a03c50a862" (UK NBN). registry.dataset_suggest(owningOrg="07f617d0-c688-11d8-bf62-b8a03c50a862") # Fulltext search for all datasets having the word "amsterdam" somewhere in # its metadata (title, description, etc). registry.dataset_suggest(q="amsterdam") # Limited search registry.dataset_suggest(type="OCCURRENCE", limit=2) registry.dataset_suggest(type="OCCURRENCE", limit=2, offset=10) # Return just descriptions registry.dataset_suggest(type="OCCURRENCE", limit = 5, description=True) # Search by decade registry.dataset_suggest(decade=1980, limit = 30) """ url = gbif_baseurl + "dataset/suggest" args = { "q": q, "type": type, "keyword": keyword, "publishingOrg": publishingOrg, "hostingOrg": hostingOrg, "owningOrg": owningOrg, "decade": decade, "publishingCountry": publishingCountry, "limit": limit, "offset": offset, } out = gbif_GET(url, args, **kwargs) return out
def getdata(x): url = gbif_baseurl + "dataset/" + x + "/metrics" return gbif_GET(url, {})