Esempio n. 1
0
def gbif_serach(taxon_name):
  """
  API to GBIF database.
  :param taxon_name: Scientific name of tree species (e.g 'Fagus sylvatica')
  """
  
  try:
    from pygbif import occurrences as occ
    from pygbif import species


    polys = ["POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))",
    "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
    "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",      
    "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",   
    "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
    "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",     
    "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",     
    "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",   
    "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", 
    "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",      
    "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",     
    "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"]


    nm = species.name_backbone(taxon_name)['usageKey']
    logger.info('Taxon Key found: %s' % nm)
  except Exception as e:
    logger.error('Taxon Key defining failed %s' % (e))
  
  try:
    results = []
    for i, p  in enumerate(polys):
      res = []
      x = occ.search(taxonKey = nm, geometry = p)
      res.append(x['results'])
      while not x['endOfRecords']:
          x = occ.search(taxonKey = nm, geometry = p, offset = sum([ len(x) for x in res ]))
          res.append(x['results'])
      results.append([w for z in res for w in z])
      logger.info ('Polygon %s/%s done' % (i+1, len(polys)))
    logger.info('***** GBIF data fetching done! ***** ')    
  except Exception as e:
    logger.error('Coordinate fetching failed: %s' % (e))
   
  try:
    allres = [w for z in results for w in z]
    coords = [ { k: v for k, v in w.items() if k.startswith('decimal') } for w in allres ]

    from numpy import empty
    latlon = empty([len(coords),2], dtype=float, order='C')

    for i, coord in enumerate(coords):   
      latlon[i][0] = coord['decimalLatitude']  
      latlon[i][1] = coord['decimalLongitude']
     
    logger.info('read in PA coordinates for %s rows ' % len(ll[:,0])) 
  except Exception as e:
    logger.error('failed search GBIF data %s' % (e))
  return latlon
Esempio n. 2
0
    def get_gbif_results(self):
        first = occurrences.search(**self.gbif_query)
        results = first['results']
        for offset in range(300, min(first['count'], 90000), 300):
            args = {**self.gbif_query, **{'offset': offset}}
            results += occurrences.search(**args)['results']

        return {r['gbifID']: r for r in results}
Esempio n. 3
0
    def find_species_occurrences(self, **kwargs):
        """
        Finds and loads species occurrence data into pandas DataFrame.
        Data comes from the GBIF database, based on name or gbif ID
        the occurrences.search(...) returns a list of json structures
        which we load into Pandas DataFrame for easier manipulation.

        """

        try:
            species_result = species.name_backbone(name=self.name_species,
                                                   verbose=False)
            if species_result['matchType'] == 'NONE':
                raise ValueError("No match for the species %s " %
                                 self.name_species)
            self.ID = species_result['usageKey']
            first_res = occurrences.search(taxonKey=self.ID,
                                           limit=100000,
                                           **kwargs)

        except AttributeError:  # name not provided, assume at least ID is provided
            first_res = occurrences.search(taxonKey=self.ID,
                                           limit=100000,
                                           **kwargs)

        #TODO: more efficient way than copying...appending to the same dataframe?

        full_results = copy.copy(first_res)

        # results are paginated so we need a loop to fetch them all
        counter = 1
        while first_res['endOfRecords'] is False:
            first_res = occurrences.search(taxonKey=self.ID,
                                           offset=300 * counter,
                                           limit=10000)
            full_results['results'] = copy.copy(
                full_results['results']) + copy.copy(first_res['results'])
            counter += 1

        logger.info("Loading species ... ")
        logger.info("Number of occurrences: %s " % full_results['count'])
        logger.debug(full_results['count'] == len(
            full_results['results']))  # match?

        #TODO: do we want a special way of loading? say, suggesting data types in some columns?

        #TODO: should we reformat the dtypes of the columns? at least day/month/year we care?
        #data_cleaned[['day', 'month', 'year']] = data_cleaned[['day', 'month', 'year']].fillna(0.0).astype(int)

        self.data_full = pd.DataFrame(
            full_results['results'])  # load results in pandas dataframes
        if self.data_full.empty:
            logger.info("Could not retrieve any occurrences!")
        else:
            logger.info("Loaded species: %s " %
                        self.data_full['species'].unique())
        return self.data_full
Esempio n. 4
0
def get_gbif(taxon_name='Fagus sylvatica', bbox=[-10, -10, 10, 10]):
    """
    fetching species data from GBIF database ( pageing over polygons in Europe )

    :param taxon_name: Taxon name of the species to be searched
                     default='Fagus sylvatica'
    :param bbox: extention of georaphical region to fetch data e.g bbox=[-180,-90,180,90]
    :returns dic: Dictionay of species occurences
    """
    from numpy import arange  # nan, empty,
    from pygbif import occurrences as occ
    from pygbif import species

    logger.info('libs loaded in get_gbif function')

    try:
        nm = species.name_backbone(taxon_name)['usageKey']
        logger.info('taxon name set')
        print('taxon name set')
        # generate polygons with gridwidth 10_degree
        # x_len = (bbox[2] - bbox[0] ) / 10
        # y_len = (bbox[3] - bbox[1] ) / 10
        # logger.info('length = %s , %s ' % (x_len, y_len))
        polys = []
        gridlen = 10

        for x in arange(bbox[0], bbox[2], gridlen):
            for y in arange(bbox[1], bbox[3], gridlen):
                print 'processing %s , %s' % (x, y)
                poly = "POLYGON ((%s %s,%s %s,%s %s,%s %s,%s %s))" % \
                    (x, y, x, y + gridlen, x + gridlen, y + gridlen, x + gridlen, y, x, y)
                polys.extend([poly])
        print(polys)

        logger.info('%s polygons created' % len(polys))
        gbifdic = []

        for i in polys:
            logger.info('processing polyon')
            res = []
            x = occ.search(taxonKey=nm, geometry=i)
            res.append(x['results'])
            while not x['endOfRecords']:
                x = occ.search(taxonKey=nm, geometry=i, offset=sum([len(x) for x in res]))
                res.append(x['results'])
            gbifdic.append([w for z in res for w in z])
            logger.info('polyon fetched')

        results = [w for z in gbifdic for w in z]
    except:
        msg = 'failed search GBIF data.'
        logger.exception(msg)
        raise
    return results
Esempio n. 5
0
def gbif_serach(taxon_name):
    from numpy import nan, empty
    from pygbif import occurrences as occ
    from pygbif import species

    try:
        nm = species.name_backbone(taxon_name)['usageKey']

        ## a set of WKT polygons
        polys = [
            "POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))",
            "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
            "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",
            "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",
            "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
            "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",
            "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",
            "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",
            "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))",
            "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",
            "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",
            "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"
        ]

        results = []
        for i in polys:
            res = []
            x = occ.search(taxonKey=nm, geometry=i)
            res.append(x['results'])
            while not x['endOfRecords']:
                x = occ.search(taxonKey=nm,
                               geometry=i,
                               offset=sum([len(x) for x in res]))
                res.append(x['results'])

            results.append([w for z in res for w in z])
            logger.info('polyon fetched')

        allres = [w for z in results for w in z]
        coords = [{k: v
                   for k, v in w.items() if k.startswith('decimal')}
                  for w in allres]

        latlon = empty([len(coords), 2], dtype=float, order='C')
        for i, coord in enumerate(coords):
            latlon[i][0] = Latitude
            latlon[i][1] = Longitude
        nz = (latlon == 0).sum(1)
        ll = latlon[nz == 0, :]
        logger.info('read in PA coordinates for %s rows ' % len(ll[:, 0]))
    except Exception as e:
        logger.exception('failed search GBIF data %s' % (e))
    return ll
Esempio n. 6
0
def get_occurences(sp):
    '''
    Receive the specific code of each species to extract the occurences by country and year
    
    Args:
        sp(int) the number code of each species    
    
    Returns
        Dictionary with the occurrence information
    '''

    years = range(1970, 2020)
    x = []

    for y in years:
        data = occ.search(taxonKey=sp, limit=300, country='ES', year=str(y))
        #Get the number of occurrences:
        x.append({str(y): data['count']})

        # Set all the data in an unique dictionary
        final = {}
        for d in x:
            for k in d.keys():
                final[k] = final.get(k, 0) + d[k]

    return final
Esempio n. 7
0
def getOccurrences(plantsName):
    occurrences_output = {}
    index = [
        'family', 'phylum', 'order', 'genus', 'species', 'class', 'recordedBy',
        'decimalLatitude', 'decimalLongitude', 'eventDate', 'country',
        'stateProvince', 'locality'
    ]

    for plantName in plantsName:
        if (config and config.l_plant):
            config.l_plant["text"] = plantName
        occurrences_plant = occurrences.search(
            scientificName=util.remove_author(plantName),
            continent='south_america')

        # print(json.dumps(occurrences_plant,indent=4))
        occurrences_list = []
        if (occurrences_plant.__contains__('count')):
            for result in occurrences_plant['results']:
                occurrence = {}
                for item in index:
                    if result and result.__contains__(item):
                        occurrence[item] = result[item]
                    else:
                        occurrence[item] = ''

                occurrences_list.append(occurrence)

        occurrences_output[plantName] = occurrences_list

    return occurrences_output
Esempio n. 8
0
def GetGBIF(spp):
    
    '''
        NOTE: Not all species have all required fields in their set of records.
              To avoid an error when combining species data, create a dictionary
              with the required fields and no data, making it into a dataframe.
              Then its possible to append this empty dataframe with all the data
              for a given species regardless of whether that field exists for that
              species. Subsequently, this dataframe can be thinned back to the required
              fields and reordered. Finally, it can appended to a master dataframe.
    '''
    # Make an empty dataframe with required field names using an empty dictionary
    data0 = {'species':[],'vernacularName':[],
             'decimalLatitude':[],'decimalLongitude':[],
            'coordinateUncertaintyInMeters':[],'geodeticDatum':[],
            'eventRemarks':[],'locality':[],'locationRemarks':[],'occurrenceRemarks':[],
            'stateProvince':[],'year':[],'month':[],
            'basisOfRecord':[],'taxonRank':[],'taxonomicStatus':[]}
    df0 = pd.DataFrame(data=data0, index=None)
    
    print('Working on the following species:', spp)
    # Make an empty list to store data iterations
    tablelst = []
    n = 0
    eor = False
    # Because the pyGBIF occurrences module returns only 300 records at a time,
    # loop through all the records for a given species until its
    # reached the end of the records, i.e. endOfRecords is True
    while eor == False:
        # Gather the occurrences dictionary using the appropriate criteria
        recs = occ.search(scientificName = spp, 
                           hasCoordinate=True,
                           country='US',
                           geoSpatialIssue=False,
                           offset=n) #geoSpatialIssue=False
        # Not all species have COUNT in their occurrence record dictionary
        # !!!!!!!!! WHAT THE F**K GBIF !!!!!!!!!!!!!!!
        # If it does, print the count, otherwise print UNKNOWN RECORD COUNT
        if 'count' in recs:
            cnt = recs['count']
            print('  This species has', cnt, 'records')
        else:
            #cnt = 0.9
            print('  This species has an UNKNOWN RECORD COUNT')
        eor = recs['endOfRecords']
        tablelst = tablelst + recs['results']
        n+=300
        
    # Make a dataframe out of the compiled lists
    df = pd.DataFrame(data=tablelst)
    # Append it to the empty dataframe
    dfAppended = df0.append(df, ignore_index=True, sort=False)
    # Thin out the final dataframe to only the required fields
    # and make sure they are in the appropriate order
    dfThinned = dfAppended[['species','vernacularName','decimalLatitude','decimalLongitude',
        'coordinateUncertaintyInMeters','geodeticDatum',
        'eventRemarks','locality','locationRemarks','occurrenceRemarks',
        'stateProvince','year','month',
        'basisOfRecord','taxonRank','taxonomicStatus']]
    return dfThinned
def test_search_occurrenceID():
    "occurrences.search - diff occurrenceID"
    uuid = "a55e740b-55af-4029-9481-74e0e5049581"
    res = occurrences.search(occurrenceID = uuid)
    assert 'dict' == res.__class__.__name__
    assert 6 == len(res)
    assert uuid == res['results'][0]['occurrenceID']
Esempio n. 10
0
def GetGBIF(spp):

    # Make an empty dataframe with required field names using an empty dictionary
    df0 = pd.DataFrame()

    # Make an empty list to store data iterations
    tablelst = []
    n = 0
    eor = False
    # Because the pyGBIF occurrences module returns only 300 records at a time,
    # loop through all the records for a given species until its
    # reached the end of the records, i.e. endOfRecords is True
    while eor == False:
        # Gather the occurrences dictionary using the appropriate criteria
        recs = occ.search(scientificName=spp,
                          hasCoordinate=True,
                          country='US',
                          geoSpatialIssue=False,
                          offset=n)  #geoSpatialIssue=False

        eor = recs['endOfRecords']
        tablelst = tablelst + recs['results']
        n += 300

    # Make a dataframe out of the compiled lists
    df = pd.DataFrame(data=tablelst)
    # Append it to the empty dataframe
    dfAppended = df0.append(df, ignore_index=True, sort=False)
    return dfAppended
Esempio n. 11
0
def get_taxonomic_info(sp):
    '''
    Receive the specific code of each species to extract the taxonomic infrormation
    
    Args:
        sp(int) the number code of each species    
    
    Returns
        Dictionary with the taxonomic information
    '''
    data = occ.search(taxonKey=sp, limit=300, country='ES', year='2016')
    taxonomic = data["results"]
    for dictionary in taxonomic:
        species_ = dictionary["scientificName"]
        kingdom = dictionary["kingdom"]
        genus = dictionary['genus']
        family = dictionary['family']
        country = dictionary["country"]
        records = dictionary['basisOfRecord']
        pub = dictionary['publishingCountry']

    species_info = {
        'species': species_,
        'kingdom': kingdom,
        'Genus': genus,
        'Family': family,
        'country': country,
        'records': records,
        'Publishing_country': pub
    }
    return species_info
Esempio n. 12
0
def gbif_serach(taxon_name):
  from numpy import nan, empty
  from pygbif import occurrences as occ
  from pygbif import species
  
  try:
    nm = species.name_backbone(taxon_name)['usageKey']

    ## a set of WKT polygons
    polys = ["POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))",
    "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
    "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",      
    "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",   
    "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
    "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",     
    "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",     
    "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",   
    "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", 
    "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",      
    "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",     
    "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"]

    results = []
    for i in polys:
        res = []
        x = occ.search(taxonKey = nm, geometry = i)
        res.append(x['results'])
        while not x['endOfRecords']:
            x = occ.search(taxonKey = nm, geometry = i, offset = sum([ len(x) for x in res ]))
            res.append(x['results'])

        results.append([w for z in res for w in z])
        logger.info('polyon fetched')

    allres = [w for z in results for w in z]
    coords = [ { k: v for k, v in w.items() if k.startswith('decimal') } for w in allres ]

    latlon = empty([len(coords),2], dtype=float, order='C')
    for i , coord in enumerate(coords): 
      latlon[i][0] = Latitude  
      latlon[i][1] = Longitude  
    nz = (latlon == 0).sum(1)
    ll = latlon[nz == 0, :]
    logger.info('read in PA coordinates for %s rows ' % len(ll[:,0])) 
  except Exception as e: 
    logger.exception('failed search GBIF data %s' % (e))
  return ll             
 def has_occurrences(self, taxid, geometry=None, country=None):
     occ = occurrences.search(
         taxonKey=taxid, geometry=geometry, country=country, limit=1
     )
     self.logger.debug(
         "Ask for {} occurrences(s), got {}".format(1, len(occ["results"]))
     )
     return len(occ["results"]) > 0
Esempio n. 14
0
    def find_species_occurrences(self, **kwargs):
        """
        Finds and loads species occurrence data into pandas DataFrame.
        Data comes from the GBIF database, based on name or gbif ID
        the occurrences.search(...) returns a list of json structures
        which we load into Pandas DataFrame for easier manipulation.

        """

        try:
            species_result = species.name_backbone(name=self.name_species, verbose=False)
            if species_result['matchType']=='NONE':
                raise ValueError("No match for the species %s " % self.name_species)
            self.ID = species_result['usageKey']
            first_res = occurrences.search(taxonKey=self.ID, limit=100000, **kwargs)

        except AttributeError: # name not provided, assume at least ID is provided
            first_res = occurrences.search(taxonKey=self.ID, limit=100000, **kwargs)
        
        #TODO: more efficient way than copying...appending to the same dataframe?

        full_results = copy.copy(first_res)

        # results are paginated so we need a loop to fetch them all
        counter = 1
        while first_res['endOfRecords'] is False:
            first_res = occurrences.search(taxonKey=self.ID, offset=300*counter, limit=10000)
            full_results['results'] = copy.copy(full_results['results']) + copy.copy(first_res['results'])
            counter+=1
        
        logger.info("Loading species ... ")
        logger.info("Number of occurrences: %s " % full_results['count'])
        logger.debug(full_results['count'] == len(full_results['results'])) # match?

        #TODO: do we want a special way of loading? say, suggesting data types in some columns?

        #TODO: should we reformat the dtypes of the columns? at least day/month/year we care?
        #data_cleaned[['day', 'month', 'year']] = data_cleaned[['day', 'month', 'year']].fillna(0.0).astype(int)
        
        self.data_full = pd.DataFrame(full_results['results']) # load results in pandas dataframes
        if self.data_full.empty:
            logger.info("Could not retrieve any occurrences!")
        else:   
            logger.info("Loaded species: %s " % self.data_full['species'].unique())
        return self.data_full
Esempio n. 15
0
    def _get_gbif_occs(self):
        # get the gbif key for our species
        self.occfile = os.path.join(
            self.outputs_dir,
            self.profile['spname'].replace(" ", "_") + ".csv")
        if not self.key:
            self.key = species.name_backbone(name=self.profile['spname'],
                                             rank='species')['usageKey']

        # make lists to fill
        self.lats = []
        self.lons = []

        # cycle through observations, filling lists of lat and lon
        curr_offset = 0
        end_records = False
        while not end_records:
            occ_records = occ.search(taxonKey=self.key,
                                     hasCoordinate=True,
                                     decimalLatitude=','.join([
                                         str(self.profile['ymin']),
                                         str(self.profile['ymax'])
                                     ]),
                                     decimalLongitude=','.join([
                                         str(self.profile['xmin']),
                                         str(self.profile['xmax'])
                                     ]),
                                     offset=curr_offset)
            end_records = occ_records['endOfRecords']
            curr_offset += occ_records['limit']

            self.lons.extend(
                [i['decimalLongitude'] for i in occ_records['results']])
            self.lats.extend(
                [i['decimalLatitude'] for i in occ_records['results']])

        # prepare array to write to csv
        csvarr = np.vstack([
            np.repeat(self.profile['spname'].replace(" ", "_"),
                      len(self.lons)), self.lons,
            [
                "{}{}".format(a_, b_)
                for a_, b_ in zip(self.lats, np.repeat('\n', len(self.lats)))
            ]
        ]).T
        # write occurrence data to csv
        with open(self.occfile, 'w') as f:
            f.write('Species,Longitude,Latitude\n')
            for line in csvarr:
                f.write(",".join(line))

        # make these easier to work with downstream
        self.lons = np.array(self.lons)
        self.lats = np.array(self.lats)
Esempio n. 16
0
def searching(name, myDate):
    now = datetime.datetime.now().strftime(
        "%Y-%m-%d")  #pobranie aktualnej daty

    x = occ.search(q=name, eventDate=(myDate + ',' +
                                      now))  #wyszukanie frazy od daty do teraz

    dane = (x['results'])  #zapisanie danych
    limit = len(dane)

    wektor = list()  #wektor przechowujacy rekordy

    for i in range(limit):
        #zapisanie potrzebnych informacji z danych do list
        country = None
        scientificName = None
        eventDate = None
        recordedBy = None
        try:
            country = dane[i]['country']
        except:
            pass
        try:
            scientificName = dane[i]['scientificName']
        except:
            pass
        try:
            eventDate = dane[i]['eventDate']
        except:
            pass
        try:
            recordedBy = dane[i]['recordedBy']
        except:
            pass
        dataKey = dane[i]['key']
        wektor.append((dataKey, country, scientificName, eventDate,
                       recordedBy))  #połączenie wyników z list do jednej listy

    #wektor.sort(key=takeDate,reverse=True) #poszeregowanie zmiennych po dacie, od daty najwcześniejszej
    wektor_koncowy = list()
    for i in range(len(wektor)):
        wektor_koncowy.append((i + 1, wektor[i][0], wektor[i][1], wektor[i][2],
                               wektor[i][3], wektor[i][4]))
    return wektor_koncowy
Esempio n. 17
0
def get_coordinates(sp):
    '''
    Extracts the taxonomic information of each species and the coordinates for each one of them.
    Args:
        List with the codes of the species that we want to download from the database
    Returns: 
        Dataframe with 11 columns: long, lat, locality, year, month, kingdom, class, family, genus,
        species, common_name
    '''

    long, lat, month, year_list, kingdom, class_, family, genus, species_, verna, locality  = [], [], [], [], [], [], [], [], [], [], []

    years = range(2000, 2021)

    for year in years:
        data = occ.search(taxonKey=sp, country='ES', year=str(year))

        for i in data["results"]:
            month.append(i.get("month"))
            long.append(i.get("decimalLongitude"))
            lat.append(i.get("decimalLatitude"))
            year_list.append(i.get("year"))
            species_.append(i.get("scientificName"))
            kingdom.append(i.get("kingdom"))
            genus.append(i.get("genus"))
            family.append(i.get("family"))
            class_.append(i.get("class"))
            verna.append(i.get("vernacularName"))
            locality.append(i.get("locality"))

    df = pd.DataFrame(list(
        zip(long, lat, locality, year_list, month, kingdom, class_, family,
            genus, species_, verna)),
                      columns=[
                          'long', 'lat', "locality", "year", "month",
                          "kingdom", "class", "family", "genus", "species",
                          "common_name"
                      ])

    return df
Esempio n. 18
0
def test_search_():
    "occurrences.search - diff taxonKey2"
    res = occurrences.search(taxonKey = 2683264)
    assert 'dict' == res.__class__.__name__
    assert 6 == len(res)
    assert 2683264 == res['results'][0]['taxonKey']
Esempio n. 19
0
from pygbif import species, occurrences
from numpy import nan, empty
TName = "Fagus sylvatica"
key = species.name_backbone(name=TName, rank="species")["usageKey"]
n = occurrences.count(taxonKey=key, isGeoreferenced=True)

if n > 200000:
    max = 200000
else:
    max = n
results = occurrences.search(taxonKey=key, limit=max)

print '(', key, ')', '-', format(n, ','), " ocurrence(s)"

# lonslats = []
latlon = empty([max, 2], dtype=float, order='C')

for i, x in enumerate(results["results"]):
    #try:
    #if x['continent'].find('_') != -1:
    #Continent = ' '.join(x['continent'].split('_')).title()
    #else:
    #Continent = x['continent'].capitalize()
    #except:
    #Continent = ""
    #try:
    #Country = x['country']
    #except:
    #Country = ""

    #try:
Esempio n. 20
0
def search_species(entry):
    # To be used when calling the method

    name_input = entry

    # User input for testing purposes
    # name_input = input("Enter a common species name: ")

    results = {}

    # Uses the pygbif method to find results
    suggest = species.name_suggest(q=name_input, rank='SPECIES', limit=25)

    # Pulls the results from the dataset in json format
    suggest_data = suggest['data']['results']

    # print(suggest_data)

    # Formats the data for pythonic purposes
    data = suggest_data

    # Reads data for each result
    for o in data:
        # Finds the gbif key
        key = o['key']
        # Uses pygbif to find number of occurrences of species key
        occurs = occurrences.count(taxonKey=key)
        # Searches occurrence data for the species key
        occur_search = occurrences.search(taxonKey=key)
        # print('occur search: ' + str(occur_search))

        # for country in countries:
        #     print(country)
        # print(occur_search)

        # Runs if species has occurred more than zero times
        if occurs > 0:
            try:
                # Tries to retrieve scientific name
                canon_name = o['canonicalName']
            except:
                continue
            # Vernacular name init
            vern_name = ''
            # Variable for list of vernacular names
            names = o['vernacularNames']
            # Summary init
            summary = ''
            try:
                # If match found
                print('Scientific name: ' + canon_name)
                print('Vernacular names: ')
                match_found = False
                # Reads from results in matched name
                for name in names:
                    # Variable for vernacular name
                    vern_name = name['vernacularName']
                    # Used if all languages want to be included
                    # print(name['vernacularName'])
                    language = (name['language'])
                    # Can be changed if user wants to select a specific language
                    if language == 'eng':
                        # Checks if vernacular name is matched with search input
                        if name_input in vern_name and match_found is False:
                            match_found = True
                            print(vern_name)
                # If no exact match is found it reads the first vernacular name
                if match_found is False:
                    name_store = []
                    for name in names:
                        vern_name = name['vernacularName']
                        language = (name['language'])
                        if vern_name not in name_store:
                            name_store.append(vern_name)
                            if language == 'eng':
                                print(vern_name)
                # Adds scientific and vernacular name to results dictionary
                results.setdefault(canon_name, []).append(vern_name)

                # print(wikipedia.search(canon_name))
                try:
                    # pulls the wiki page based on canonical name in url (usually works)
                    # desc = wikipedia.page(canon_name, auto_suggest=True)
                    # alternative wiki page including all sections
                    # page = wikipedia.WikipediaPage(canon_name)
                    # pulls the summary page for the species
                    summary = wikipedia.summary(canon_name, sentences=2)
                    # experimental for section pages
                    # sections = page.sections
                    # for section in sections:
                    #     print(section)
                    # github push error, delete this
                    print(summary)
                    results.setdefault(canon_name, []).append(summary)
                    # print(desc.content)
                except:
                    print("No description found")

                print('GBIF Key: ' + str(key))
                print('GBIF Species Page: ' + 'http://www.gbif.org/species/' + str(key))
                print('Count: ' + str(occurs))

                # This reads results for occurences and finds countries the species
                # was observed in and how many occurrences there
                occur_data = occur_search['results']
                countries = {}
                for occur in occur_data:
                    try:
                        country = occur['country']
                        if country not in countries:
                            countries[country] = 1
                        else:
                            countries[country] += 1
                            # print(occur['country'])
                    except:
                        continue
                # for country in countries:
                #     print(country)
                sorted_countries = sorted(countries.items(), key=lambda x: x[1], reverse=True)
                if countries != {}:
                    print('Top 3 Countries Observed: ')
                    for country in sorted_countries[:3]:
                        print(str(country))
                print('\n')

                # return canon_name, vern_name, summary
            except:
                continue

    print(results)
    return results
Esempio n. 21
0
def test_search_key1():
    "occurrences.search - diff taxonKey"
    res = occurrences.search(taxonKey=2431762)
    assert "dict" == res.__class__.__name__
    assert 6 == len(res)
    assert 2431762 == res["results"][0]["taxonKey"]
Esempio n. 22
0
sql_twi = """ SELECT coordinate FROM gbif_requests
              WHERE request_id = '{0}'""".format(config.gbif_req_id)
coordinate = cursor2.execute(sql_twi).fetchone()[0]

sql_twi = """ SELECT continent FROM gbif_requests
              WHERE request_id = '{0}'""".format(config.gbif_req_id)
continent = cursor2.execute(sql_twi).fetchone()[0]
if continent == "None":
    continent = None

#################### REQUEST RECORDS ACCORDING TO REQUEST PARAMS
# First, find out how many records there are that meet criteria
occ_search = occurrences.search(gbif_id,
                                year=years,
                                month=months,
                                decimelLatitude=latRange,
                                decimelLongitude=lonRange,
                                hasGeospatialIssue=geoIssue,
                                hasCoordinate=coordinate,
                                continent=continent)
occ_count = occ_search['count']
print('\n{0} records exist with the request parameters'.format(occ_count))

# Get occurrences in batches, saving into master list
alloccs = []
batches = range(0, occ_count, 300)
for i in batches:
    occ_json = occurrences.search(gbif_id,
                                  limit=300,
                                  offset=i,
                                  year=years,
                                  month=months,
Esempio n. 23
0
def test_search_():
    "occurrences.search - diff taxonKey2"
    res = occurrences.search(taxonKey = 1052909293)
    assert 'dict' == res.__class__.__name__
    assert 5 == len(res)
Esempio n. 24
0
def main():
    from dateutil.parser import parse

    try:
        from pygbif import occurrences
        from pygbif import species
    except ImportError:
        grass.fatal(
            _("Cannot import pygbif (https://github.com/sckott/pygbif)"
              " library."
              " Please install it (pip install pygbif)"
              " or ensure that it is on path"
              " (use PYTHONPATH variable)."))

    # Parse input options
    output = options["output"]
    mask = options["mask"]
    species_maps = flags["i"]
    no_region_limit = flags["r"]
    no_topo = flags["b"]
    print_species = flags["p"]
    print_species_table = flags["t"]
    print_species_shell = flags["g"]
    print_occ_number = flags["o"]
    allow_no_geom = flags["n"]
    hasGeoIssue = flags["s"]
    taxa_list = options["taxa"].split(",")
    institutionCode = options["institutioncode"]
    basisofrecord = options["basisofrecord"]
    recordedby = options["recordedby"].split(",")
    date_from = options["date_from"]
    date_to = options["date_to"]
    country = options["country"]
    continent = options["continent"]
    rank = options["rank"]

    # Define static variable
    # Initialize cat
    cat = 0
    # Number of occurrences to fetch in one request
    chunk_size = 300
    # lat/lon proj string
    latlon_crs = [
        "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000",
        "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0,0,0,0,0,0,0",
        "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000 +type=crs",
    ]
    # List attributes available in Darwin Core
    # not all attributes are returned in each request
    # to avoid key errors when accessing the dictionary returned by pygbif
    # presence of DWC keys in the returned dictionary is checked using this list
    # The number of keys in this list has to be equal to the number of columns
    # in the attribute table and the attributes written for each occurrence
    dwc_keys = [
        "key",
        "taxonRank",
        "taxonKey",
        "taxonID",
        "scientificName",
        "species",
        "speciesKey",
        "genericName",
        "genus",
        "genusKey",
        "family",
        "familyKey",
        "order",
        "orderKey",
        "class",
        "classKey",
        "phylum",
        "phylumKey",
        "kingdom",
        "kingdomKey",
        "eventDate",
        "verbatimEventDate",
        "startDayOfYear",
        "endDayOfYear",
        "year",
        "month",
        "day",
        "occurrenceID",
        "occurrenceStatus",
        "occurrenceRemarks",
        "Habitat",
        "basisOfRecord",
        "preparations",
        "sex",
        "type",
        "locality",
        "verbatimLocality",
        "decimalLongitude",
        "decimalLatitude",
        "coordinateUncertaintyInMeters",
        "geodeticDatum",
        "higerGeography",
        "continent",
        "country",
        "countryCode",
        "stateProvince",
        "gbifID",
        "protocol",
        "identifier",
        "recordedBy",
        "identificationID",
        "identifiers",
        "dateIdentified",
        "modified",
        "institutionCode",
        "lastInterpreted",
        "lastParsed",
        "references",
        "relations",
        "catalogNumber",
        "occurrenceDetails",
        "datasetKey",
        "datasetName",
        "collectionCode",
        "rights",
        "rightsHolder",
        "license",
        "publishingOrgKey",
        "publishingCountry",
        "lastCrawled",
        "specificEpithet",
        "facts",
        "issues",
        "extensions",
        "language",
    ]
    # Deinfe columns for attribute table
    cols = [
        ("cat", "INTEGER PRIMARY KEY"),
        ("g_search", "varchar(100)"),
        ("g_key", "integer"),
        ("g_taxonrank", "varchar(50)"),
        ("g_taxonkey", "integer"),
        ("g_taxonid", "varchar(50)"),
        ("g_scientificname", "varchar(255)"),
        ("g_species", "varchar(255)"),
        ("g_specieskey", "integer"),
        ("g_genericname", "varchar(255)"),
        ("g_genus", "varchar(50)"),
        ("g_genuskey", "integer"),
        ("g_family", "varchar(50)"),
        ("g_familykey", "integer"),
        ("g_order", "varchar(50)"),
        ("g_orderkey", "integer"),
        ("g_class", "varchar(50)"),
        ("g_classkey", "integer"),
        ("g_phylum", "varchar(50)"),
        ("g_phylumkey", "integer"),
        ("g_kingdom", "varchar(50)"),
        ("g_kingdomkey", "integer"),
        ("g_eventdate", "text"),
        ("g_verbatimeventdate", "varchar(50)"),
        ("g_startDayOfYear", "integer"),
        ("g_endDayOfYear", "integer"),
        ("g_year", "integer"),
        ("g_month", "integer"),
        ("g_day", "integer"),
        ("g_occurrenceid", "varchar(255)"),
        ("g_occurrenceStatus", "varchar(50)"),
        ("g_occurrenceRemarks", "varchar(50)"),
        ("g_Habitat", "varchar(50)"),
        ("g_basisofrecord", "varchar(50)"),
        ("g_preparations", "varchar(50)"),
        ("g_sex", "varchar(50)"),
        ("g_type", "varchar(50)"),
        ("g_locality", "varchar(255)"),
        ("g_verbatimlocality", "varchar(255)"),
        ("g_decimallongitude", "double precision"),
        ("g_decimallatitude", "double precision"),
        ("g_coordinateUncertaintyInMeters", "double precision"),
        ("g_geodeticdatum", "varchar(50)"),
        ("g_higerGeography", "varchar(255)"),
        ("g_continent", "varchar(50)"),
        ("g_country", "varchar(50)"),
        ("g_countryCode", "varchar(50)"),
        ("g_stateProvince", "varchar(50)"),
        ("g_gbifid", "varchar(255)"),
        ("g_protocol", "varchar(255)"),
        ("g_identifier", "varchar(50)"),
        ("g_recordedby", "varchar(255)"),
        ("g_identificationid", "varchar(255)"),
        ("g_identifiers", "text"),
        ("g_dateidentified", "text"),
        ("g_modified", "text"),
        ("g_institutioncode", "varchar(50)"),
        ("g_lastinterpreted", "text"),
        ("g_lastparsed", "text"),
        ("g_references", "varchar(255)"),
        ("g_relations", "text"),
        ("g_catalognumber", "varchar(50)"),
        ("g_occurrencedetails", "text"),
        ("g_datasetkey", "varchar(50)"),
        ("g_datasetname", "varchar(255)"),
        ("g_collectioncode", "varchar(50)"),
        ("g_rights", "varchar(255)"),
        ("g_rightsholder", "varchar(255)"),
        ("g_license", "varchar(50)"),
        ("g_publishingorgkey", "varchar(50)"),
        ("g_publishingcountry", "varchar(50)"),
        ("g_lastcrawled", "text"),
        ("g_specificepithet", "varchar(50)"),
        ("g_facts", "text"),
        ("g_issues", "text"),
        ("g_extensions", "text"),
        ("g_language", "varchar(50)"),
    ]

    # maybe no longer required in Python3
    set_output_encoding()
    # Set temporal filter if requested by user
    # Initialize eventDate filter
    eventDate = None
    # Check if date from is compatible (ISO compliant)
    if date_from:
        try:
            parse(date_from)
        except:
            grass.fatal("Invalid invalid start date provided")

        if date_from and not date_to:
            eventDate = "{}".format(date_from)
    # Check if date to is compatible (ISO compliant)
    if date_to:
        try:
            parse(date_to)
        except:
            grass.fatal("Invalid invalid end date provided")
        # Check if date to is after date_from
        if parse(date_from) < parse(date_to):
            eventDate = "{},{}".format(date_from, date_to)
        else:
            grass.fatal(
                "Invalid date range: End date has to be after start date!")
    # Set filter on basisOfRecord if requested by user
    if basisofrecord == "ALL":
        basisOfRecord = None
    else:
        basisOfRecord = basisofrecord
    # Allow also occurrences with spatial issues if requested by user
    hasGeospatialIssue = False
    if hasGeoIssue:
        hasGeospatialIssue = True
    # Allow also occurrences without coordinates if requested by user
    hasCoordinate = True
    if allow_no_geom:
        hasCoordinate = False

    # Set reprojection parameters
    # Set target projection of current LOCATION
    proj_info = grass.parse_command("g.proj", flags="g")
    target_crs = grass.read_command("g.proj", flags="fj").rstrip()
    target = osr.SpatialReference()

    # Prefer EPSG CRS definitions
    if proj_info["epsg"]:
        target.ImportFromEPSG(int(proj_info["epsg"]))
    else:
        target.ImportFromProj4(target_crs)

    # GDAL >= 3 swaps x and y axis, see: github.com/gdal/issues/1546
    if int(gdal_version[0]) >= 3:
        target.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER)

    if target_crs == "XY location (unprojected)":
        grass.fatal("Sorry, XY locations are not supported!")

    # Set source projection from GBIF
    source = osr.SpatialReference()
    source.ImportFromEPSG(4326)
    # GDAL >= 3 swaps x and y axis, see: github.com/gdal/issues/1546
    if int(gdal_version[0]) >= 3:
        source.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER)

    if target_crs not in latlon_crs:
        transform = osr.CoordinateTransformation(source, target)
        reverse_transform = osr.CoordinateTransformation(target, source)

    # Generate WKT polygon to use for spatial filtering if requested
    if mask:
        if len(mask.split("@")) == 2:
            m = VectorTopo(mask.split("@")[0], mapset=mask.split("@")[1])
        else:
            m = VectorTopo(mask)
        if not m.exist():
            grass.fatal("Could not find vector map <{}>".format(mask))
        m.open("r")
        if not m.is_open():
            grass.fatal("Could not open vector map <{}>".format(mask))

        # Use map Bbox as spatial filter if map contains <> 1 area
        if m.number_of("areas") == 1:
            region_pol = [area.to_wkt() for area in m.viter("areas")][0]
        else:
            bbox = (str(m.bbox()).replace("Bbox(", "").replace(
                " ", "").rstrip(")").split(","))
            region_pol = "POLYGON (({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))".format(
                bbox[2], bbox[0], bbox[3], bbox[1])
        m.close()
    else:
        # Do not limit import spatially if LOCATION is able to take global data
        if no_region_limit:
            if target_crs not in latlon_crs:
                grass.fatal("Import of data from outside the current region is"
                            "only supported in a WGS84 location!")
            region_pol = None
        else:
            # Limit import spatially to current region
            # if LOCATION is !NOT! able to take global data
            # to avoid pprojection ERRORS
            region = grass.parse_command("g.region", flags="g")
            region_pol = "POLYGON (({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))".format(
                region["e"], region["n"], region["w"], region["s"])

    # Do not reproject in latlon LOCATIONS
    if target_crs not in latlon_crs:
        pol = ogr.CreateGeometryFromWkt(region_pol)
        pol.Transform(reverse_transform)
        pol = pol.ExportToWkt()
    else:
        pol = region_pol

    # Create output map if not output maps for each species are requested
    if (not species_maps and not print_species and not print_species_shell
            and not print_occ_number and not print_species_table):
        mapname = output
        new = Vector(mapname)
        new.open("w", tab_name=mapname, tab_cols=cols)
        cat = 1

    # Import data for each species
    for s in taxa_list:
        # Get the taxon key if not the taxon key is provided as input
        try:
            key = int(s)
        except:
            try:
                species_match = species.name_backbone(s,
                                                      rank=rank,
                                                      strict=False,
                                                      verbose=True)
                key = species_match["usageKey"]
            except:
                grass.error(
                    "Data request for taxon {} failed. Are you online?".format(
                        s))
                continue

        # Return matching taxon and alternatives and exit
        if print_species:
            print("Matching taxon for {} is:".format(s))
            print("{} {}".format(species_match["scientificName"],
                                 species_match["status"]))
            if "alternatives" in list(species_match.keys()):
                print("Alternative matches might be: {}".format(s))
                for m in species_match["alternatives"]:
                    print("{} {}".format(m["scientificName"], m["status"]))
            else:
                print("No alternatives found for the given taxon")
            continue
        if print_species_shell:
            print("match={}".format(species_match["scientificName"]))
            if "alternatives" in list(species_match.keys()):
                alternatives = []
                for m in species_match["alternatives"]:
                    alternatives.append(m["scientificName"])
                print("alternatives={}".format(",".join(alternatives)))
            continue
        if print_species_table:
            if "alternatives" in list(species_match.keys()):
                if len(species_match["alternatives"]) == 0:
                    print("{0}|{1}|{2}|".format(
                        s, key, species_match["scientificName"]))
                else:
                    alternatives = []
                    for m in species_match["alternatives"]:
                        alternatives.append(m["scientificName"])
                    print("{0}|{1}|{2}|{3}".format(
                        s,
                        key,
                        species_match["scientificName"],
                        ",".join(alternatives),
                    ))
            continue
        try:
            returns_n = occurrences.search(
                taxonKey=key,
                hasGeospatialIssue=hasGeospatialIssue,
                hasCoordinate=hasCoordinate,
                institutionCode=institutionCode,
                basisOfRecord=basisOfRecord,
                recordedBy=recordedby,
                eventDate=eventDate,
                continent=continent,
                country=country,
                geometry=pol,
                limit=1,
            )["count"]
        except:
            grass.error(
                "Data request for taxon {} faild. Are you online?".format(s))
            returns_n = 0

        # Exit if search does not give a return
        # Print only number of returns for the given search and exit
        if print_occ_number:
            print("Found {0} occurrences for taxon {1}...".format(
                returns_n, s))
            continue
        elif returns_n <= 0:
            grass.warning(
                "No occurrences for current search for taxon {0}...".format(s))
            continue
        elif returns_n >= 200000:
            grass.warning(
                "Your search for {1} returns {0} records.\n"
                "Unfortunately, the GBIF search API is limited to 200,000 records per request.\n"
                "The download will be incomplete. Please consider to split up your search."
                .format(returns_n, s))

        # Get the number of chunks to download
        chunks = int(math.ceil(returns_n / float(chunk_size)))
        grass.verbose("Downloading {0} occurrences for taxon {1}...".format(
            returns_n, s))

        # Create a map for each species if requested using map name as suffix
        if species_maps:
            mapname = "{}_{}".format(s.replace(" ", "_"), output)

            new = Vector(mapname)
            new.open("w", tab_name=mapname, tab_cols=cols)
            cat = 0

        # Download the data from GBIF
        for c in range(chunks):
            # Define offset
            offset = c * chunk_size
            # Adjust chunk_size to the hard limit of 200,000 records in GBIF API
            # if necessary
            if offset + chunk_size >= 200000:
                chunk_size = 200000 - offset
            # Get the returns for the next chunk
            returns = occurrences.search(
                taxonKey=key,
                hasGeospatialIssue=hasGeospatialIssue,
                hasCoordinate=hasCoordinate,
                institutionCode=institutionCode,
                basisOfRecord=basisOfRecord,
                recordedBy=recordedby,
                eventDate=eventDate,
                continent=continent,
                country=country,
                geometry=pol,
                limit=chunk_size,
                offset=offset,
            )

            # Write the returned data to map and attribute table
            for res in returns["results"]:
                if target_crs not in latlon_crs:
                    point = ogr.CreateGeometryFromWkt("POINT ({} {})".format(
                        res["decimalLongitude"], res["decimalLatitude"]))
                    point.Transform(transform)
                    x = point.GetX()
                    y = point.GetY()
                else:
                    x = res["decimalLongitude"]
                    y = res["decimalLatitude"]

                point = Point(x, y)

                for k in dwc_keys:
                    if k not in list(res.keys()):
                        res.update({k: None})

                cat = cat + 1
                new.write(
                    point,
                    cat=cat,
                    attrs=(
                        "{}".format(s),
                        res["key"],
                        res["taxonRank"],
                        res["taxonKey"],
                        res["taxonID"],
                        res["scientificName"],
                        res["species"],
                        res["speciesKey"],
                        res["genericName"],
                        res["genus"],
                        res["genusKey"],
                        res["family"],
                        res["familyKey"],
                        res["order"],
                        res["orderKey"],
                        res["class"],
                        res["classKey"],
                        res["phylum"],
                        res["phylumKey"],
                        res["kingdom"],
                        res["kingdomKey"],
                        "{}".format(res["eventDate"])
                        if res["eventDate"] else None,
                        "{}".format(res["verbatimEventDate"])
                        if res["verbatimEventDate"] else None,
                        res["startDayOfYear"],
                        res["endDayOfYear"],
                        res["year"],
                        res["month"],
                        res["day"],
                        res["occurrenceID"],
                        res["occurrenceStatus"],
                        res["occurrenceRemarks"],
                        res["Habitat"],
                        res["basisOfRecord"],
                        res["preparations"],
                        res["sex"],
                        res["type"],
                        res["locality"],
                        res["verbatimLocality"],
                        res["decimalLongitude"],
                        res["decimalLatitude"],
                        res["coordinateUncertaintyInMeters"],
                        res["geodeticDatum"],
                        res["higerGeography"],
                        res["continent"],
                        res["country"],
                        res["countryCode"],
                        res["stateProvince"],
                        res["gbifID"],
                        res["protocol"],
                        res["identifier"],
                        res["recordedBy"],
                        res["identificationID"],
                        ",".join(res["identifiers"]),
                        "{}".format(res["dateIdentified"])
                        if res["dateIdentified"] else None,
                        "{}".format(res["modified"])
                        if res["modified"] else None,
                        res["institutionCode"],
                        "{}".format(res["lastInterpreted"])
                        if res["lastInterpreted"] else None,
                        "{}".format(res["lastParsed"])
                        if res["lastParsed"] else None,
                        res["references"],
                        ",".join(res["relations"]),
                        res["catalogNumber"],
                        "{}".format(res["occurrenceDetails"])
                        if res["occurrenceDetails"] else None,
                        res["datasetKey"],
                        res["datasetName"],
                        res["collectionCode"],
                        res["rights"],
                        res["rightsHolder"],
                        res["license"],
                        res["publishingOrgKey"],
                        res["publishingCountry"],
                        "{}".format(res["lastCrawled"])
                        if res["lastCrawled"] else None,
                        res["specificEpithet"],
                        ",".join(res["facts"]),
                        ",".join(res["issues"]),
                        ",".join(res["extensions"]),
                        res["language"],
                    ),
                )

                cat = cat + 1

        # Close the current map if a map for each species is requested
        if species_maps:
            new.table.conn.commit()
            new.close()
            if not no_topo:
                grass.run_command("v.build", map=mapname, option="build")

            # Write history to map
            grass.vector_history(mapname)

    # Close the output map if not a map for each species is requested
    if (not species_maps and not print_species and not print_species_shell
            and not print_occ_number and not print_species_table):
        new.table.conn.commit()
        new.close()
        if not no_topo:
            grass.run_command("v.build", map=mapname, option="build")

        # Write history to map
        grass.vector_history(mapname)
Esempio n. 25
0
def test_search_():
    "occurrences.search - diff taxonKey2"
    res = occurrences.search(taxonKey=2683264)
    assert 'dict' == res.__class__.__name__
    assert 6 == len(res)
    assert 2683264 == res['results'][0]['taxonKey']
Esempio n. 26
0
    def get_gbif_occs(self, geometry=False, tol=0):
        """
        Query the gbif database for occurrence data.
        """

        # Create a file to store occurrence data.
        self.occfile = os.path.join(
            self.outdir, self.params['spname'].replace(" ", "_") + ".csv")

        # Get the usageKey for species of interest.
        self.key = species.name_backbone(name=self.params['spname'],
                                         rank='species')['usageKey']

        # Create latitude/longitude lists.
        self.lats = []
        self.lons = []

        # Build dicts for optional params.
        # if self.params['basis'] == True:
        basis_params = dict(basisOfRecord=[
            'HUMAN_OBSERVATION', 'LIVING_SPECIMEN', 'FOSSIL_SPECIMEN'
        ], )
        # if self.params['continent'] is not None:
        continent_params = dict(continent=self.params['continent'])
        if geometry == True:
            geo_orient = shapely.geometry.polygon.orient(
                self.geometry['geometry'][0],
                1.0)  # Counter-clockwise for GBIF.
            geometry_bounds = dict(geometry=str(geo_orient.simplify(tol)))
        else:
            geometry_bounds = dict(place='holder')
        search_bounds = dict(
            decimalLatitude=','.join(
                [str(self.params['ymin']),
                 str(self.params['ymax'])]),
            decimalLongitude=','.join(
                [str(self.params['xmin']),
                 str(self.params['xmax'])]),
        )

        # Run a while-loop to go through all observations.  By default, tries to narrow to native range.
        # Don't pass lat/long bounds if none were entered.
        curr_offset = 0
        end_records = False
        while not end_records:
            occ_records = occ.search(
                taxonKey=self.key,
                hasCoordinate=True,
                # hasGeospatialIssue = False,
                **{
                    k: v
                    for k, v in basis_params.items()
                    if self.params['basis'] == True
                },
                **{
                    k: v
                    for k, v in continent_params.items()
                    if self.params['continent'] is not None
                },
                **{
                    k: v
                    for k, v in geometry_bounds.items() if geometry == True
                },
                **{k: v
                   for k, v in search_bounds.items() if 'None' not in v},
                offset=curr_offset)
            end_records = occ_records['endOfRecords']
            curr_offset += occ_records['limit']

            # Add latitude/longitude results to lists.
            self.lats.extend(
                [i['decimalLatitude'] for i in occ_records['results']])
            self.lons.extend(
                [i['decimalLongitude'] for i in occ_records['results']])

            # Print a dot on each cycle to show progress.
            print(".", end="")

            # When end of data is reached: build pandas dataframe from lists and remove duplicate data points.
            if occ_records['endOfRecords']:
                df = pd.DataFrame({
                    'Latitude': self.lats,
                    'Longitude': self.lons
                })
                df = df.drop_duplicates().reset_index()
                df = df.drop('index', axis=1)

                # Filter outliers.
                df = df[(np.abs(stats.zscore(df)) < 3).all(axis=1)]

                # Reform the lists by subsetting the dataframe.
                self.lats = list(df['Latitude'])
                self.lons = list(df['Longitude'])

                # Print final number of records.
                print(f' Found {len(self.lats)} records.')

        # Build array to write to CSV file.  np.vstack layers arrays vertically, where each layer is species-lat-lon.
        # np.repeat copies the species names as many times as there are entries.  It also combines with zip() to put
        # a newline char at the end of each layer.
        csvarr = np.vstack([
            np.repeat(self.params['spname'].replace(" ", "_"), len(self.lats)),
            self.lats,
            [
                "{}{}".format(a_, b_)
                for a_, b_ in zip(self.lons, np.repeat('\n', len(self.lats)))
            ]
        ]).T

        # Write array to CSV file.
        with open(self.occfile, 'w') as f:
            f.write('Species,Latitude,Longitude\n')
            for line in csvarr:
                f.write(",".join(line))

        # Transform lists to arrays for downstream application.
        self.lats = np.array(self.lats)
        self.lons = np.array(self.lons)
Esempio n. 27
0
def hogSearch(left, right, top, bottom):
    l = left
    r = right
    t = top
    b = bottom
    coords = latStat(l, r, t, b)
    hog = occ.search(decimalLatitude=coords[0],
                     decimalLongitude=coords[1],
                     scientificName='Sus scrofa')
    print(hog['count'])
    dist = 0
    while hog['count'] == 0:
        dist += 0.7
        l -= .01
        r += .01
        t += .01
        b -= .01
        coords = latStat(l, r, t, b)
        print(coords[0], coords[1])
        hog = occ.search(decimalLatitude=coords[0],
                         decimalLongitude=coords[1],
                         scientificName='Sus scrofa')

    hogHistoric = []
    hog2019 = []
    nDic = hog['results']
    print(nDic)

    for sightings in nDic:
        si = sightings["eventDate"]
        hogHistoric.append(si)
        if '2019' in sightings["eventDate"]:
            hog2019.append(si)

    pigs2019 = []
    for hd in hog2019:
        nd = hd[:10]
        dd = nd.split('-', 2)
        pigs2019.append(datetime(int(dd[0]), int(dd[1]), int(dd[2])))

    allpigs = []
    for allHog in hogHistoric:
        nd = allHog[:10]
        dd = nd.split('-', 2)
        allpigs.append(datetime(int(dd[0]), int(dd[1]), int(dd[2])))

    if pigs2019 != []:
        dayDif = str(
            round(
                (abs(datetime.now() - max(pigs2019)).total_seconds()) / 86400,
                2))
        bigDay = str(max(pigs2019))[:10]
    else:
        dayDif = str(
            round((abs(datetime.now() - max(allpigs)).total_seconds()) / 86400,
                  2))
        bigDay = str(max(allpigs))[:10]

    print(dayDif)
    print(bigDay)

    str1 = "WARNING: WE FOUND " + str(
        len(hogHistoric)) + " HOG RECORDS IN YOUR AREA. "
    str2 = "MOST RECENT HOG WAS SEEN " + dayDif + " DAYS AGO. CHOOSE A MORE SPECIFIC LOCATION FOR BETTER RESULTS."
    pigPic = []
    for ss in nDic:
        print(ss["eventDate"][:10])
        if ss["eventDate"][:10] == str(bigDay):
            pigPic = ss["media"]
            break
    str3 = pigPic[0]['identifier']

    if dist != 0:
        totS = "WARNING: A HOG WAS SPOTTED " + str(
            dist) + " MILES FROM YOU " + dayDif + " DAYS AGO. "
    else:
        totS = str1 + str2

    hogDic = [totS, str3]
    return hogDic
Esempio n. 28
0
df0 = pd.DataFrame()

reclst = []
lstcols = ['ScienticName', 'nRecords']

print('+' * 60)
print('\n')

for spp in sppList:
    print('Working on the following species:', spp)
    # First use the species module to get the taxonKey for a species scientific name
    tkey = species.name_backbone(name=spp, rank='species')['usageKey']
    # Gather the occurrences dictionary using the appropriate criteria
    recs = occ.search(taxonKey=tkey,
                      hasCoordinate=True,
                      country='US',
                      geoSpatialIssue=False)

    if 'count' in recs:
        cnt = recs['count']
        print('  This species has', cnt, 'records')
    else:
        print('  This species has an UNKNOWN RECORD COUNT')
        cnt = -99
    reclst.append([spp, cnt])

print('+' * 60)

# Make a dataframe out of the compiled lists and save as CSV
dfRecordCount = pd.DataFrame(data=reclst, columns=lstcols)
dfRecordCount.to_csv(workDir + "SpeciesRecordCountGBIF.csv")
Esempio n. 29
0
alloccs2 = []
for x in alloccs:
    alloccs2.append(dict((y, x[y]) for y in x if y in keykeys))

##################################################  FILTER MORE
###############################################################

#  COORDINATE UNCERTAINTY
sql_green = """SELECT has_coordinate_uncertainty FROM gbif_filters
               WHERE filter_id = '{0}';""".format(config.gbif_filter_id)
filt_coordUncertainty = cursor2.execute(sql_green).fetchone()[0]

if filt_coordUncertainty == 1:
    alloccs3 = [
        x for x in alloccs2 if 'coordinateUncertaintyInMeters' in x.keys()
    ]
if filt_coordUncertainty == 0:
    alloccs3 = alloccs2

#___________________________________8
test_occs = occurrences.search(gbif_id,
                               year=years,
                               month='1,12',
                               decimelLatitude=latRange,
                               decimelLongitude=lonRange,
                               hasGeospatialIssue=geoIssue,
                               hasCoordinate=True,
                               continent=continent)
occ_count = test_occs['count']
print('{0} records exist'.format(occ_count))
Esempio n. 30
0
def test_search():
    "occurrences.search - basic test"
    res = occurrences.search(taxonKey = 3329049)
    assert 'dict' == res.__class__.__name__
    assert 6 == len(res)
    assert sorted(keyz) == sorted(res.keys())
Esempio n. 31
0
def test_search_():
    "occurrences.search - diff taxonKey"
    res = occurrences.search(taxonKey = 252408386)
    assert 'dict' == res.__class__.__name__
    assert 24 == len(res)
    assert 252408386 == res['key']
Esempio n. 32
0
def gbif_serach(taxon_name):
    """
  API to GBIF database.
  :param taxon_name: Scientific name of tree species (e.g 'Fagus sylvatica')
  """

    try:
        from pygbif import occurrences as occ
        from pygbif import species

        polys = [
            "POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))",
            "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
            "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",
            "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",
            "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
            "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",
            "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",
            "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",
            "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))",
            "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",
            "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",
            "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"
        ]

        nm = species.name_backbone(taxon_name)['usageKey']
        logger.info('Taxon Key found: %s' % nm)
    except Exception as e:
        logger.error('Taxon Key defining failed %s' % (e))

    try:
        results = []
        for i, p in enumerate(polys):
            res = []
            x = occ.search(taxonKey=nm, geometry=p)
            res.append(x['results'])
            while not x['endOfRecords']:
                x = occ.search(taxonKey=nm,
                               geometry=p,
                               offset=sum([len(x) for x in res]))
                res.append(x['results'])
            results.append([w for z in res for w in z])
            logger.info('Polygon %s/%s done' % (i + 1, len(polys)))
        logger.info('***** GBIF data fetching done! ***** ')
    except Exception as e:
        logger.error('Coordinate fetching failed: %s' % (e))

    try:
        allres = [w for z in results for w in z]
        coords = [{k: v
                   for k, v in w.items() if k.startswith('decimal')}
                  for w in allres]

        from numpy import empty
        latlon = empty([len(coords), 2], dtype=float, order='C')

        for i, coord in enumerate(coords):
            latlon[i][0] = coord['decimalLatitude']
            latlon[i][1] = coord['decimalLongitude']

        logger.info('read in PA coordinates for %s rows ' % len(ll[:, 0]))
    except Exception as e:
        logger.error('failed search GBIF data %s' % (e))
    return latlon
Esempio n. 33
0
def test_search():
    "occurrences.search - basic test"
    res = occurrences.search(taxonKey = 3329049)
    assert 'dict' == res.__class__.__name__
    assert 5 == len(res)
    assert [u'count', u'endOfRecords', u'limit', u'results', u'offset'] == res.keys()
Esempio n. 34
0
def test_search_identified_by_id():
    "occurrences.search - identifiedByID"
    res = occurrences.search(identifiedByID=x, limit=3)
    assert "dict" == res.__class__.__name__
    assert 6 == len(res)
    assert x == res["results"][0]["identifiedByIDs"][0]['value']
Esempio n. 35
0
from pygbif import species, occurrences
from numpy import nan, empty 
TName = "Fagus sylvatica"
key = species.name_backbone(name=TName, rank="species")["usageKey"]
n = occurrences.count(taxonKey=key, isGeoreferenced=True)

if n > 200000:
    max = 200000
else:
    max = n
results = occurrences.search(taxonKey=key, limit=max)

print '(', key, ')', '-', format(n, ','), " ocurrence(s)"

# lonslats = []
latlon = empty([max,2], dtype=float, order='C')

for i, x in enumerate(results["results"]):
    #try:
        #if x['continent'].find('_') != -1:
            #Continent = ' '.join(x['continent'].split('_')).title()
        #else:
            #Continent = x['continent'].capitalize()
    #except:
            #Continent = ""  
    #try:
        #Country = x['country']
    #except:
        #Country = ""
           
    #try:
Esempio n. 36
0
def main():

    try:
        from pygbif import occurrences
        from pygbif import species
    except ImportError:
        grass.fatal(_("Cannot import pygbif (https://github.com/sckott/pygbif)"
                      " library."
                      " Please install it (pip install pygbif)"
                      " or ensure that it is on path"
                      " (use PYTHONPATH variable)."))

    # Parse input options
    output = options['output']
    mask = options['mask']
    species_maps = flags['i']
    no_region_limit = flags['r']
    no_topo = flags['b']
    print_species = flags['p']
    print_species_table = flags['t']
    print_species_shell = flags['g']
    print_occ_number = flags['o']
    allow_no_geom = flags['n']
    hasGeoIssue = flags['s']
    taxa_list = options['taxa'].split(',')
    institutionCode = options['institutioncode']
    basisofrecord = options['basisofrecord']
    recordedby = options['recordedby'].split(',')
    date_from = options['date_from']
    date_to = options['date_to']
    country = options['country']
    continent = options['continent']
    rank = options['rank']

    # Define static variable
    #Initialize cat
    cat = 0
    # Number of occurrences to fetch in one request
    chunk_size = 300
    # lat/lon proj string
    latlon_crs = ['+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000',
                  '+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0,0,0,0,0,0,0']
    # List attributes available in Darwin Core
    # not all attributes are returned in each request
    # to avoid key errors when accessing the dictionary returned by pygbif
    # presence of DWC keys in the returned dictionary is checked using this list
    # The number of keys in this list has to be equal to the number of columns
    # in the attribute table and the attributes written for each occurrence
    dwc_keys = ['key', 'taxonRank', 'taxonKey', 'taxonID', 'scientificName',
                'species', 'speciesKey', 'genericName', 'genus', 'genusKey',
                'family', 'familyKey', 'order', 'orderKey', 'class',
                'classKey', 'phylum', 'phylumKey', 'kingdom', 'kingdomKey',
                'eventDate', 'verbatimEventDate', 'startDayOfYear',
                'endDayOfYear', 'year', 'month', 'day', 'occurrenceID',
                'occurrenceStatus', 'occurrenceRemarks', 'Habitat',
                'basisOfRecord', 'preparations', 'sex', 'type', 'locality',
                'verbatimLocality', 'decimalLongitude', 'decimalLatitude',
                'geodeticDatum', 'higerGeography', 'continent', 'country',
                'countryCode', 'stateProvince', 'gbifID', 'protocol',
                'identifier', 'recordedBy', 'identificationID', 'identifiers',
                'dateIdentified', 'modified', 'institutionCode',
                'lastInterpreted', 'lastParsed', 'references', 'relations',
                'catalogNumber', 'occurrenceDetails', 'datasetKey',
                'datasetName', 'collectionCode', 'rights', 'rightsHolder',
                'license', 'publishingOrgKey', 'publishingCountry',
                'lastCrawled', 'specificEpithet', 'facts', 'issues',
                'extensions', 'language']
    # Deinfe columns for attribute table
    cols = [('cat',       'INTEGER PRIMARY KEY'),
            ('g_search',       'varchar(100)'),
            ('g_key',       'integer'),
            ('g_taxonrank',       'varchar(50)'),
            ('g_taxonkey',       'integer'),
            ('g_taxonid',       'varchar(50)'),
            ('g_scientificname',       'varchar(255)'),
            ('g_species',       'varchar(255)'),
            ('g_specieskey',       'integer'),
            ('g_genericname',       'varchar(255)'),
            ('g_genus',       'varchar(50)'),
            ('g_genuskey',       'integer'),
            ('g_family',       'varchar(50)'),
            ('g_familykey',       'integer'),
            ('g_order',       'varchar(50)'),
            ('g_orderkey',       'integer'),
            ('g_class',       'varchar(50)'),
            ('g_classkey',       'integer'),
            ('g_phylum',       'varchar(50)'),
            ('g_phylumkey',       'integer'),
            ('g_kingdom',       'varchar(50)'),
            ('g_kingdomkey',       'integer'),
            ('g_eventdate',       'text'),
            ('g_verbatimeventdate',       'varchar(50)'),
            ('g_startDayOfYear',       'integer'),
            ('g_endDayOfYear',       'integer'),
            ('g_year',       'integer'),
            ('g_month',       'integer'),
            ('g_day',       'integer'),
            ('g_occurrenceid',       'varchar(255)'),
            ('g_occurrenceStatus',       'varchar(50)'),
            ('g_occurrenceRemarks',       'varchar(50)'),
            ('g_Habitat',       'varchar(50)'),
            ('g_basisofrecord',       'varchar(50)'),
            ('g_preparations',       'varchar(50)'),
            ('g_sex',       'varchar(50)'),
            ('g_type',       'varchar(50)'),
            ('g_locality',       'varchar(255)'),
            ('g_verbatimlocality',       'varchar(255)'),
            ('g_decimallongitude',       'double precision'),
            ('g_decimallatitude',       'double precision'),
            ('g_geodeticdatum',       'varchar(50)'),
            ('g_higerGeography',       'varchar(255)'),
            ('g_continent',       'varchar(50)'),
            ('g_country',       'varchar(50)'),
            ('g_countryCode',       'varchar(50)'),
            ('g_stateProvince',       'varchar(50)'),
            ('g_gbifid',       'varchar(255)'),
            ('g_protocol',       'varchar(255)'),
            ('g_identifier',       'varchar(50)'),
            ('g_recordedby',       'varchar(255)'),
            ('g_identificationid',       'varchar(255)'),
            ('g_identifiers',       'text'),
            ('g_dateidentified',       'text'),
            ('g_modified',       'text'),
            ('g_institutioncode',       'varchar(50)'),
            ('g_lastinterpreted',       'text'),
            ('g_lastparsed',       'text'),
            ('g_references',       'varchar(255)'),
            ('g_relations',       'text'),
            ('g_catalognumber',       'varchar(50)'),
            ('g_occurrencedetails',       'text'),
            ('g_datasetkey',       'varchar(50)'),
            ('g_datasetname',       'varchar(255)'),
            ('g_collectioncode',       'varchar(50)'),
            ('g_rights',       'varchar(255)'),
            ('g_rightsholder',       'varchar(255)'),
            ('g_license',       'varchar(50)'),
            ('g_publishingorgkey',       'varchar(50)'),
            ('g_publishingcountry',       'varchar(50)'),
            ('g_lastcrawled',       'text'),
            ('g_specificepithet',       'varchar(50)'),
            ('g_facts',       'text'),
            ('g_issues',       'text'),
            ('g_extensions',       'text'),
            ('g_language',       'varchar(50)')]

    set_output_encoding()
    # Set temporal filter if requested by user
    # Initialize eventDate filter
    eventDate = None
    # Check if date from is compatible (ISO compliant)
    if date_from:
        try:
            parse(date_from)
        except:
            grass.fatal("Invalid invalid start date provided")

        if date_from and not date_to:
            eventDate = '{}'.format(date_from)
    # Check if date to is compatible (ISO compliant)
    if date_to:
        try:
            parse(date_to)
        except:
            grass.fatal("Invalid invalid end date provided")
        # Check if date to is after date_from
        if parse(date_from) < parse(date_to):
            eventDate = '{},{}'.format(date_from, date_to)
        else:
            grass.fatal("Invalid date range: End date has to be after start date!")
    # Set filter on basisOfRecord if requested by user
    if basisofrecord == 'ALL':
        basisOfRecord = None
    else:
        basisOfRecord = basisofrecord
    # Allow also occurrences with spatial issues if requested by user
    hasGeospatialIssue = False
    if hasGeoIssue:
        hasGeospatialIssue = True
    # Allow also occurrences without coordinates if requested by user
    hasCoordinate = True
    if allow_no_geom:
        hasCoordinate = False

    # Set reprojection parameters
    # Set target projection of current LOCATION
    target_crs = grass.read_command('g.proj', flags='fj').rstrip(os.linesep)
    target = osr.SpatialReference(target_crs)
    target.ImportFromProj4(target_crs)
    if target == 'XY location (unprojected)':
        grass.fatal("Sorry, XY locations are not supported!")

    # Set source projection from GBIF
    source = osr.SpatialReference()
    source.ImportFromEPSG(4326)
    if target_crs not in latlon_crs:
        transform = osr.CoordinateTransformation(source, target)
        reverse_transform = osr.CoordinateTransformation(target, source)

    # Generate WKT polygon to use for spatial filtering if requested
    if mask:
        if len(mask.split('@')) == 2:
            m = VectorTopo(mask.split('@')[0], mapset=mask.split('@')[1])
        else:
            m = VectorTopo(mask)
        if not m.exist():
            grass.fatal('Could not find vector map <{}>'.format(mask))
        m.open('r')
        if not m.is_open():
            grass.fatal('Could not open vector map <{}>'.format(mask))

        # Use map Bbox as spatial filter if map contains <> 1 area
        if m.number_of('areas') == 1:
            region_pol = [area.to_wkt() for area in m.viter("areas")][0]
        else:
            bbox = str(m.bbox()).replace('Bbox(', '').replace(' ', '').rstrip(')').split(',')
            region_pol = 'POLYGON(({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))'.format(bbox[2],
                         bbox[0], bbox[3], bbox[1])
        m.close()
    else:
        # Do not limit import spatially if LOCATION is able to take global data
        if no_region_limit:
            if target_crs not in latlon_crs:
                grass.fatal('Import of data from outside the current region is'
                            'only supported in a WGS84 location!')
            region_pol = None
        else:
            # Limit import spatially to current region
            # if LOCATION is !NOT! able to take global data
            # to avoid pprojection ERRORS
            region = grass.parse_command('g.region', flags='g')
            region_pol = 'POLYGON(({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))'.format(region['e'],
                         region['n'], region['w'], region['s'])

    # Do not reproject in latlon LOCATIONS
    if target_crs not in latlon_crs:
        pol = ogr.CreateGeometryFromWkt(region_pol)
        pol.Transform(reverse_transform)
        pol = pol.ExportToWkt()
    else:
        pol = region_pol

    # Create output map if not output maps for each species are requested
    if not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table:
        mapname = output
        new = Vector(mapname)
        new.open('w', tab_name=mapname, tab_cols=cols)
        cat = 1

    # Import data for each species
    for s in taxa_list:
        # Get the taxon key if not the taxon key is provided as input
        try:
            key = int(s)
        except:
            try:
                species_match = species.name_backbone(s, rank=rank,
                                                      strict=False,
                                                      verbose=True)
                key = species_match['usageKey']
            except:
                grass.error('Data request for taxon {} failed. Are you online?'.format(s))
                continue

        # Return matching taxon and alternatives and exit
        if print_species:
            print('Matching taxon for {} is:'.format(s))
            print('{} {}'.format(species_match['scientificName'], species_match['status']))
            if 'alternatives' in list(species_match.keys()):
                print('Alternative matches might be:'.format(s))
                for m in species_match['alternatives']:
                    print('{} {}'.format(m['scientificName'], m['status']))
            else:
                print('No alternatives found for the given taxon')
            continue
        if print_species_shell:
            print('match={}'.format(species_match['scientificName']))
            if 'alternatives' in list(species_match.keys()):
                alternatives = []
                for m in species_match['alternatives']:
                    alternatives.append(m['scientificName'])
                print('alternatives={}'.format(','.join(alternatives)))
            continue
        if print_species_table:
            if 'alternatives' in list(species_match.keys()):
                if len(species_match['alternatives']) == 0:
                    print('{0}|{1}|{2}|'.format(s, key, species_match['scientificName']))
                else:
                    alternatives = []
                    for m in species_match['alternatives']:
                        alternatives.append(m['scientificName'])
                    print('{0}|{1}|{2}|{3}'.format(s, key, species_match['scientificName'],
                                                    ','.join(alternatives)))
            continue
        try:
            returns_n = occurrences.search(taxonKey=key,
                                           hasGeospatialIssue=hasGeospatialIssue,
                                           hasCoordinate=hasCoordinate,
                                           institutionCode=institutionCode,
                                           basisOfRecord=basisOfRecord,
                                           recordedBy=recordedby,
                                           eventDate=eventDate,
                                           continent=continent,
                                           country=country,
                                           geometry=pol,
                                           limit=1)['count']
        except:
            grass.error('Data request for taxon {} faild. Are you online?'.format(s))
            returns_n = 0

        # Exit if search does not give a return
        # Print only number of returns for the given search and exit
        if print_occ_number:
            grass.message('Found {0} occurrences for taxon {1}...'.format(returns_n, s))
            continue
        elif returns_n <= 0:
            grass.warning('No occurrences for current search for taxon {0}...'.format(s))
            continue
        elif returns_n >= 200000:
            grass.warning('Your search for {1} returns {0} records.\n'
                          'Unfortunately, the GBIF search API is limited to 200,000 records per request.\n'
                          'The download will be incomplete. Please consider to split up your search.'.format(returns_n, s))

        # Get the number of chunks to download
        chunks = int(math.ceil(returns_n / float(chunk_size)))
        grass.verbose('Downloading {0} occurrences for taxon {1}...'.format(returns_n, s))

        # Create a map for each species if requested using map name as suffix
        if species_maps:
            mapname = '{}_{}'.format(s.replace(' ', '_'), output)

            new = Vector(mapname)
            new.open('w', tab_name=mapname, tab_cols=cols)
            cat = 0

        # Download the data from GBIF
        for c in range(chunks):
            # Define offset
            offset = c * chunk_size
            # Adjust chunk_size to the hard limit of 200,000 records in GBIF API
            # if necessary
            if offset + chunk_size >= 200000:
                chunk_size = 200000 - offset
            # Get the returns for the next chunk
            returns = occurrences.search(taxonKey=key,
                                         hasGeospatialIssue=hasGeospatialIssue,
                                         hasCoordinate=hasCoordinate,
                                         institutionCode=institutionCode,
                                         basisOfRecord=basisOfRecord,
                                         recordedBy=recordedby,
                                         eventDate=eventDate,
                                         continent=continent,
                                         country=country,
                                         geometry=pol,
                                         limit=chunk_size,
                                         offset=offset)

            # Write the returned data to map and attribute table
            for res in returns['results']:
                if target_crs not in latlon_crs:
                    point = ogr.CreateGeometryFromWkt('POINT ({} {})'.format(res['decimalLongitude'], res['decimalLatitude']))
                    point.Transform(transform)
                    x = point.GetX()
                    y = point.GetY()
                else:
                    x = res['decimalLongitude']
                    y = res['decimalLatitude']

                point = Point(x, y)

                for k in dwc_keys:
                    if k not in list(res.keys()):
                        res.update({k: None})

                cat = cat + 1
                new.write(point, cat=cat, attrs=(
                          '{}'.format(s),
                          res['key'],
                          res['taxonRank'],
                          res['taxonKey'],
                          res['taxonID'],
                          res['scientificName'],
                          res['species'],
                          res['speciesKey'],
                          res['genericName'],
                          res['genus'],
                          res['genusKey'],
                          res['family'],
                          res['familyKey'],
                          res['order'],
                          res['orderKey'],
                          res['class'],
                          res['classKey'],
                          res['phylum'],
                          res['phylumKey'],
                          res['kingdom'],
                          res['kingdomKey'],
                          '{}'.format(res['eventDate']) if res['eventDate'] else None,
                          '{}'.format(res['verbatimEventDate']) if res['verbatimEventDate'] else None,
                          res['startDayOfYear'],
                          res['endDayOfYear'],
                          res['year'],
                          res['month'],
                          res['day'],
                          res['occurrenceID'],
                          res['occurrenceStatus'],
                          res['occurrenceRemarks'],
                          res['Habitat'],
                          res['basisOfRecord'],
                          res['preparations'],
                          res['sex'],
                          res['type'],
                          res['locality'],
                          res['verbatimLocality'],
                          res['decimalLongitude'],
                          res['decimalLatitude'],
                          res['geodeticDatum'],
                          res['higerGeography'],
                          res['continent'],
                          res['country'],
                          res['countryCode'],
                          res['stateProvince'],
                          res['gbifID'],
                          res['protocol'],
                          res['identifier'],
                          res['recordedBy'],
                          res['identificationID'],
                          ','.join(res['identifiers']),
                          '{}'.format(res['dateIdentified']) if res['dateIdentified'] else None,
                          '{}'.format(res['modified']) if res['modified'] else None,
                          res['institutionCode'],
                          '{}'.format(res['lastInterpreted']) if res['lastInterpreted'] else None,
                          '{}'.format(res['lastParsed']) if res['lastParsed'] else None,
                          res['references'],
                          ','.join(res['relations']),
                          res['catalogNumber'],
                          '{}'.format(res['occurrenceDetails']) if res['occurrenceDetails'] else None,
                          res['datasetKey'],
                          res['datasetName'],
                          res['collectionCode'],
                          res['rights'],
                          res['rightsHolder'],
                          res['license'],
                          res['publishingOrgKey'],
                          res['publishingCountry'],
                          '{}'.format(res['lastCrawled']) if res['lastCrawled'] else None,
                          res['specificEpithet'],
                          ','.join(res['facts']),
                          ','.join(res['issues']),
                          ','.join(res['extensions']),
                          res['language'],))

                cat = cat + 1

        # Close the current map if a map for each species is requested
        if species_maps:
            new.table.conn.commit()
            new.close()
            if not no_topo:
                grass.run_command('v.build', map=mapname, option='build')

    # Close the output map if not a map for each species is requested
    if not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table:
        new.table.conn.commit()
        new.close()
        if not no_topo:
            grass.run_command('v.build', map=mapname, option='build')
Esempio n. 37
0
"POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
"POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",      
"POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",   
"POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
"POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",     
"POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",     
"POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",   
"POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", 
"POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",      
"POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",     
"POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"]

results = []
for i in polys:
    res = []
    x = occ.search(taxonKey = nm, geometry = i)
    res.append(x['results'])
    while not x['endOfRecords']:
        x = occ.search(taxonKey = nm, geometry = i, offset = sum([ len(x) for x in res ]))
        res.append(x['results'])

    results.append([w for z in res for w in z])
    print 'polygon done'
       
allres = [w for z in results for w in z]
coords = [ { k: v for k, v in w.items() if k.startswith('decimal') } for w in allres ]

from numpy import empty
latlon = empty([len(coords),2], dtype=float, order='C')

for i , coord in enumerate(coords):   
Esempio n. 38
0
def test_search():
    "occurrences.search - basic test"
    res = occurrences.search(taxonKey=3329049)
    assert 'dict' == res.__class__.__name__
    assert 6 == len(res)
    assert sorted(keyz) == sorted(res.keys())
Esempio n. 39
0
print("*" * 40)

sppList = ['Accipiter cooperii', 'Myodes gapperi']

# Make an empty list to append each species' records
reclst = []
# Make column names for the list
lstcols = ['SppName', 'nRecords']

n = 0
# Loop over each species in the full species list in the config file
for spp in config.sciNames1590:

    print('Working on the following species:', spp)
    recs = occ.search(scientificName=spp,
                      hasCoordinate=True,
                      country='US',
                      geoSpatialIssue=False)
    # Not all species have COUNT in their occurrence record dictionary
    # !!!!!!!!! WHAT THE F**K GBIF !!!!!!!!!!!!!!!
    # Make sure it does otherwise make it 0.9
    if 'count' in recs:
        cnt = recs['count']
        n = n + cnt
        print('  it has', cnt, 'records')
    else:
        print('  it has UNKNOWN NUMBER of records', )
        cnt = 0.9
    # Append to the record list
    reclst.append([spp, cnt])

print('\n   TOTAL NUMBER OF RECORDS FOR THIS SPECIES LIST =', n)
Esempio n. 40
0
    "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",
    "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",
    "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
    "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",
    "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",
    "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",
    "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))",
    "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",
    "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",
    "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"
]

results = []
for i in polys:
    res = []
    x = occ.search(taxonKey=nm, geometry=i)
    res.append(x['results'])
    while not x['endOfRecords']:
        x = occ.search(taxonKey=nm,
                       geometry=i,
                       offset=sum([len(x) for x in res]))
        res.append(x['results'])

    results.append([w for z in res for w in z])
    print 'polygon done'

allres = [w for z in results for w in z]
coords = [{k: v
           for k, v in w.items() if k.startswith('decimal')} for w in allres]

from numpy import empty
    # Initial Call
    printProgressBar(0)
    # Update Progress Bar
    for i, item in enumerate(iterable):
        yield item
        printProgressBar(i + 1)
    # Print New Line on Complete
    print()

# main
limitmb = 512
procs = 6
datasetsDir = './datasets/'
indexDir = './index/'

results = occ.search(dwca_extension="http://rs.tdwg.org/dwc/terms/ResourceRelationship", limit=0, facet="datasetKey", facetLimit=1000)

ix = get_index(indexDir)

for r in progressBar(results['facets'][0]['counts'], prefix="Progress", suffix="Complete"):
    datasetKey = r['name']
    dwca_file = f'{datasetsDir}{datasetKey}.zip'

    if not os.path.isfile(dwca_file) :
        try:
            pass
            dataset = registry.datasets(uuid=datasetKey)
            dwca_endpoints = [e for e in dataset['endpoints'] if e['type'] == 'DWC_ARCHIVE']
            if len(dwca_endpoints) > 0 :
                url = dwca_endpoints[0]['url']
                req = requests.get(url, stream=True)
Esempio n. 42
0
def test_search_key2():
    "occurrences.search - diff taxonKey2"
    res = occurrences.search(taxonKey=2683264)
    assert "dict" == res.__class__.__name__
    assert 6 == len(res)
    assert 2683264 == res["results"][0]["taxonKey"]