예제 #1
0
파일: sdm.py 프로젝트: KatiRG/flyingpigeon
def gbif_serach(taxon_name):
  """
  API to GBIF database.
  :param taxon_name: Scientific name of tree species (e.g 'Fagus sylvatica')
  """
  
  try:
    from pygbif import occurrences as occ
    from pygbif import species


    polys = ["POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))",
    "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
    "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",      
    "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",   
    "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
    "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",     
    "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",     
    "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",   
    "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", 
    "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",      
    "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",     
    "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"]


    nm = species.name_backbone(taxon_name)['usageKey']
    logger.info('Taxon Key found: %s' % nm)
  except Exception as e:
    logger.error('Taxon Key defining failed %s' % (e))
  
  try:
    results = []
    for i, p  in enumerate(polys):
      res = []
      x = occ.search(taxonKey = nm, geometry = p)
      res.append(x['results'])
      while not x['endOfRecords']:
          x = occ.search(taxonKey = nm, geometry = p, offset = sum([ len(x) for x in res ]))
          res.append(x['results'])
      results.append([w for z in res for w in z])
      logger.info ('Polygon %s/%s done' % (i+1, len(polys)))
    logger.info('***** GBIF data fetching done! ***** ')    
  except Exception as e:
    logger.error('Coordinate fetching failed: %s' % (e))
   
  try:
    allres = [w for z in results for w in z]
    coords = [ { k: v for k, v in w.items() if k.startswith('decimal') } for w in allres ]

    from numpy import empty
    latlon = empty([len(coords),2], dtype=float, order='C')

    for i, coord in enumerate(coords):   
      latlon[i][0] = coord['decimalLatitude']  
      latlon[i][1] = coord['decimalLongitude']
     
    logger.info('read in PA coordinates for %s rows ' % len(ll[:,0])) 
  except Exception as e:
    logger.error('failed search GBIF data %s' % (e))
  return latlon
예제 #2
0
    def find_species_occurrences(self, **kwargs):
        """
        Finds and loads species occurrence data into pandas DataFrame.
        Data comes from the GBIF database, based on name or gbif ID
        the occurrences.search(...) returns a list of json structures
        which we load into Pandas DataFrame for easier manipulation.

        """

        try:
            species_result = species.name_backbone(name=self.name_species,
                                                   verbose=False)
            if species_result['matchType'] == 'NONE':
                raise ValueError("No match for the species %s " %
                                 self.name_species)
            self.ID = species_result['usageKey']
            first_res = occurrences.search(taxonKey=self.ID,
                                           limit=100000,
                                           **kwargs)

        except AttributeError:  # name not provided, assume at least ID is provided
            first_res = occurrences.search(taxonKey=self.ID,
                                           limit=100000,
                                           **kwargs)

        #TODO: more efficient way than copying...appending to the same dataframe?

        full_results = copy.copy(first_res)

        # results are paginated so we need a loop to fetch them all
        counter = 1
        while first_res['endOfRecords'] is False:
            first_res = occurrences.search(taxonKey=self.ID,
                                           offset=300 * counter,
                                           limit=10000)
            full_results['results'] = copy.copy(
                full_results['results']) + copy.copy(first_res['results'])
            counter += 1

        logger.info("Loading species ... ")
        logger.info("Number of occurrences: %s " % full_results['count'])
        logger.debug(full_results['count'] == len(
            full_results['results']))  # match?

        #TODO: do we want a special way of loading? say, suggesting data types in some columns?

        #TODO: should we reformat the dtypes of the columns? at least day/month/year we care?
        #data_cleaned[['day', 'month', 'year']] = data_cleaned[['day', 'month', 'year']].fillna(0.0).astype(int)

        self.data_full = pd.DataFrame(
            full_results['results'])  # load results in pandas dataframes
        if self.data_full.empty:
            logger.info("Could not retrieve any occurrences!")
        else:
            logger.info("Loaded species: %s " %
                        self.data_full['species'].unique())
        return self.data_full
예제 #3
0
def SearchGBIF(st):

    from pygbif import species

    gbifTxt = "http://api.gbif.org/v1/"
    urlName = gbifTxt + "species?name=" + st
    urlName = gbifTxt + "species/search?q=" + st + "&rank=SPECIES"
    jName = requests.get(urlName).json()
    r = jName['results']
    # Use pygbif to get the GBIF taxon key/id/nub key/whatever the f**k its called
    gid = str(species.name_backbone(name=st, rank='species')['usageKey'])

    # Find index values for all variables if they exist otherwise make them None
    # vernacular name is a little less straight forward - see below
    nameidx = next((i for i, d in enumerate(r) if 'scientificName' in d), None)
    origidx = next((i for i, d in enumerate(r) if 'origin' in d), None)
    authidx = next((i for i, d in enumerate(r) if 'authorship' in d), None)
    try:
        # Get scientific name
        if nameidx != None:
            nameGBIF = r[nameidx]['scientificName']
        else:
            nameGBIF = 'N/A'
    except (IndexError, KeyError):
        nameGBIF = 'N/A'
    try:
        # Get origin
        if origidx != None:
            nameOrig = r[origidx]['origin']
        else:
            nameOrig = 'N/a'
    except (IndexError, KeyError):
        nameOrig = 'N/A'
    try:
        # Get common name
        # This is a little trickier because the vernacularNames key
        # value contains a list which could be empty
        for i in range(len(r) - 1):
            if r[i]['vernacularNames']:
                cnGBIF = r[i]['vernacularNames'][0]['vernacularName']
            else:
                cnGBIF = 'N/A'
    except (IndexError, KeyError):
        cnGBIF = 'N/A'
    try:
        # Get authorship year
        if authidx != None:
            auth = r[authidx]['authorship']
            yrGBIF = int(re.search(pattern='(\d{4})', string=auth).group())
        else:
            yrGBIF = np.Nan
    except (IndexError, KeyError):
        yrGBIF = np.Nan

    # Return a tuple of information from the API search
    return gid, nameGBIF, cnGBIF, nameOrig, yrGBIF
예제 #4
0
파일: Mapper.py 프로젝트: pmckenz1/smood
    def _get_gbif_occs(self):
        # get the gbif key for our species
        self.occfile = os.path.join(
            self.outputs_dir,
            self.profile['spname'].replace(" ", "_") + ".csv")
        if not self.key:
            self.key = species.name_backbone(name=self.profile['spname'],
                                             rank='species')['usageKey']

        # make lists to fill
        self.lats = []
        self.lons = []

        # cycle through observations, filling lists of lat and lon
        curr_offset = 0
        end_records = False
        while not end_records:
            occ_records = occ.search(taxonKey=self.key,
                                     hasCoordinate=True,
                                     decimalLatitude=','.join([
                                         str(self.profile['ymin']),
                                         str(self.profile['ymax'])
                                     ]),
                                     decimalLongitude=','.join([
                                         str(self.profile['xmin']),
                                         str(self.profile['xmax'])
                                     ]),
                                     offset=curr_offset)
            end_records = occ_records['endOfRecords']
            curr_offset += occ_records['limit']

            self.lons.extend(
                [i['decimalLongitude'] for i in occ_records['results']])
            self.lats.extend(
                [i['decimalLatitude'] for i in occ_records['results']])

        # prepare array to write to csv
        csvarr = np.vstack([
            np.repeat(self.profile['spname'].replace(" ", "_"),
                      len(self.lons)), self.lons,
            [
                "{}{}".format(a_, b_)
                for a_, b_ in zip(self.lats, np.repeat('\n', len(self.lats)))
            ]
        ]).T
        # write occurrence data to csv
        with open(self.occfile, 'w') as f:
            f.write('Species,Longitude,Latitude\n')
            for line in csvarr:
                f.write(",".join(line))

        # make these easier to work with downstream
        self.lons = np.array(self.lons)
        self.lats = np.array(self.lats)
예제 #5
0
def get_gbif(taxon_name='Fagus sylvatica', bbox=[-10, -10, 10, 10]):
    """
    fetching species data from GBIF database ( pageing over polygons in Europe )

    :param taxon_name: Taxon name of the species to be searched
                     default='Fagus sylvatica'
    :param bbox: extention of georaphical region to fetch data e.g bbox=[-180,-90,180,90]
    :returns dic: Dictionay of species occurences
    """
    from numpy import arange  # nan, empty,
    from pygbif import occurrences as occ
    from pygbif import species

    logger.info('libs loaded in get_gbif function')

    try:
        nm = species.name_backbone(taxon_name)['usageKey']
        logger.info('taxon name set')
        print('taxon name set')
        # generate polygons with gridwidth 10_degree
        # x_len = (bbox[2] - bbox[0] ) / 10
        # y_len = (bbox[3] - bbox[1] ) / 10
        # logger.info('length = %s , %s ' % (x_len, y_len))
        polys = []
        gridlen = 10

        for x in arange(bbox[0], bbox[2], gridlen):
            for y in arange(bbox[1], bbox[3], gridlen):
                print 'processing %s , %s' % (x, y)
                poly = "POLYGON ((%s %s,%s %s,%s %s,%s %s,%s %s))" % \
                    (x, y, x, y + gridlen, x + gridlen, y + gridlen, x + gridlen, y, x, y)
                polys.extend([poly])
        print(polys)

        logger.info('%s polygons created' % len(polys))
        gbifdic = []

        for i in polys:
            logger.info('processing polyon')
            res = []
            x = occ.search(taxonKey=nm, geometry=i)
            res.append(x['results'])
            while not x['endOfRecords']:
                x = occ.search(taxonKey=nm, geometry=i, offset=sum([len(x) for x in res]))
                res.append(x['results'])
            gbifdic.append([w for z in res for w in z])
            logger.info('polyon fetched')

        results = [w for z in gbifdic for w in z]
    except:
        msg = 'failed search GBIF data.'
        logger.exception(msg)
        raise
    return results
예제 #6
0
def gbif_serach(taxon_name):
    from numpy import nan, empty
    from pygbif import occurrences as occ
    from pygbif import species

    try:
        nm = species.name_backbone(taxon_name)['usageKey']

        ## a set of WKT polygons
        polys = [
            "POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))",
            "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
            "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",
            "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",
            "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
            "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",
            "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",
            "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",
            "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))",
            "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",
            "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",
            "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"
        ]

        results = []
        for i in polys:
            res = []
            x = occ.search(taxonKey=nm, geometry=i)
            res.append(x['results'])
            while not x['endOfRecords']:
                x = occ.search(taxonKey=nm,
                               geometry=i,
                               offset=sum([len(x) for x in res]))
                res.append(x['results'])

            results.append([w for z in res for w in z])
            logger.info('polyon fetched')

        allres = [w for z in results for w in z]
        coords = [{k: v
                   for k, v in w.items() if k.startswith('decimal')}
                  for w in allres]

        latlon = empty([len(coords), 2], dtype=float, order='C')
        for i, coord in enumerate(coords):
            latlon[i][0] = Latitude
            latlon[i][1] = Longitude
        nz = (latlon == 0).sum(1)
        ll = latlon[nz == 0, :]
        logger.info('read in PA coordinates for %s rows ' % len(ll[:, 0]))
    except Exception as e:
        logger.exception('failed search GBIF data %s' % (e))
    return ll
예제 #7
0
def get_species_name_from_codes(splist):
    '''
    Receive a list o scientific names to extract the GBIF codes
    
    Args:
        splist(list) the list of target species    
    
    Returns
        Dictionary with thethe scientific name and their GBIF codes.
    '''
    keys = [species.name_backbone(x)['usageKey'] for x in splist]
    species_codes = dict(zip(splist, keys))
    return species_codes
예제 #8
0
파일: sdm.py 프로젝트: KatiRG/flyingpigeon
def gbif_serach(taxon_name):
  from numpy import nan, empty
  from pygbif import occurrences as occ
  from pygbif import species
  
  try:
    nm = species.name_backbone(taxon_name)['usageKey']

    ## a set of WKT polygons
    polys = ["POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))",
    "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
    "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",      
    "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",   
    "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
    "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",     
    "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",     
    "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",   
    "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", 
    "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",      
    "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",     
    "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"]

    results = []
    for i in polys:
        res = []
        x = occ.search(taxonKey = nm, geometry = i)
        res.append(x['results'])
        while not x['endOfRecords']:
            x = occ.search(taxonKey = nm, geometry = i, offset = sum([ len(x) for x in res ]))
            res.append(x['results'])

        results.append([w for z in res for w in z])
        logger.info('polyon fetched')

    allres = [w for z in results for w in z]
    coords = [ { k: v for k, v in w.items() if k.startswith('decimal') } for w in allres ]

    latlon = empty([len(coords),2], dtype=float, order='C')
    for i , coord in enumerate(coords): 
      latlon[i][0] = Latitude  
      latlon[i][1] = Longitude  
    nz = (latlon == 0).sum(1)
    ll = latlon[nz == 0, :]
    logger.info('read in PA coordinates for %s rows ' % len(ll[:,0])) 
  except Exception as e: 
    logger.exception('failed search GBIF data %s' % (e))
  return ll             
예제 #9
0
    def find_species_occurrences(self, **kwargs):
        """
        Finds and loads species occurrence data into pandas DataFrame.
        Data comes from the GBIF database, based on name or gbif ID
        the occurrences.search(...) returns a list of json structures
        which we load into Pandas DataFrame for easier manipulation.

        """

        try:
            species_result = species.name_backbone(name=self.name_species, verbose=False)
            if species_result['matchType']=='NONE':
                raise ValueError("No match for the species %s " % self.name_species)
            self.ID = species_result['usageKey']
            first_res = occurrences.search(taxonKey=self.ID, limit=100000, **kwargs)

        except AttributeError: # name not provided, assume at least ID is provided
            first_res = occurrences.search(taxonKey=self.ID, limit=100000, **kwargs)
        
        #TODO: more efficient way than copying...appending to the same dataframe?

        full_results = copy.copy(first_res)

        # results are paginated so we need a loop to fetch them all
        counter = 1
        while first_res['endOfRecords'] is False:
            first_res = occurrences.search(taxonKey=self.ID, offset=300*counter, limit=10000)
            full_results['results'] = copy.copy(full_results['results']) + copy.copy(first_res['results'])
            counter+=1
        
        logger.info("Loading species ... ")
        logger.info("Number of occurrences: %s " % full_results['count'])
        logger.debug(full_results['count'] == len(full_results['results'])) # match?

        #TODO: do we want a special way of loading? say, suggesting data types in some columns?

        #TODO: should we reformat the dtypes of the columns? at least day/month/year we care?
        #data_cleaned[['day', 'month', 'year']] = data_cleaned[['day', 'month', 'year']].fillna(0.0).astype(int)
        
        self.data_full = pd.DataFrame(full_results['results']) # load results in pandas dataframes
        if self.data_full.empty:
            logger.info("Could not retrieve any occurrences!")
        else:   
            logger.info("Loaded species: %s " % self.data_full['species'].unique())
        return self.data_full
예제 #10
0
 def get_valid_taxid(self, name=None, taxid=None, rank=None, kingdom=None):
     if taxid:  # TODO : Find a way to validate taxid
         return taxid, rank
     self.logger.info(
         f"Look for id of taxon {name} with rank {rank} in GBIF Backbone Taxonomy"
     )
     match = species.name_backbone(
         name=name, rank=rank, kingdom=kingdom, strict=True, verbose=False
     )
     if match["matchType"] == "EXACT":
         if match["synonym"]:
             taxid = match["acceptedUsageKey"]
         else:
             taxid = match["usageKey"]
         rank = match["rank"]
         self.logger.info(
             "Found exact match for taxon {} with id {}".format(name, taxid)
         )
         return taxid, rank
     else:
         self.logger.error("No match for taxon {} : {}".format(name, match))
         return None, rank
예제 #11
0
    def download_area(self, user=None, password=None, email=None):
        if self.interactive:
            input_information = self._get_input_data()
        else:
            input_information = self.information

        # Search for data
        key = sps.name_backbone(name=self.information[0],
                                rank='species')['usageKey']
        if (user is None or password is None):
            user = input('Provide username and press RETURN\n')
            password = input('Provide password and press RETURN\n')
            email = input('Provide email and press RETURN\n')

        tk = 'taxonKey = ' + str(key)
        ct = 'country = ' + self.information[1]
        hascoo = 'hasCoordinate = TRUE'
        hasissues = 'hasGeospatialIssue = False'
        data = occ.download([tk, ct, hascoo, hasissues],
                            user=user,
                            pwd=password,
                            email=email)
        return data
    out_dir = './output'
    print("Key not found. 'output_dir' was set to './output'.")
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

print("Downloading NUTS data ...")
# Download NUTS data from Eurostat GISCO API and save to gdf; we use EPSG=3035 because we want an equal-area projection
gdf_aoi_adm = get_nuts(projection="3035")
# if a country_code is provided, extract the subset from only this country
if country_code is not None:
    gdf_aoi_adm = gdf_aoi_adm[gdf_aoi_adm['CNTR_CODE'] == country_code]
gdf_aoi_occs = gdf_aoi_adm.copy()

print("Getting species keys ...")
# get the species keys for the provided species using the species module of the GBIF API
species_keys = [species.name_backbone(x)['usageKey'] for x in species_list]

# iterate over each species, download the data from the GBIF API, write to csv, convert to GeoDataFrame and plot
# the occurrence coloured by the year of observation.
# Finally, add the observation count of each species in each administrative unit to the NUTS GeoDataFrame
# in new columns named after the taxon key
for i, sp in enumerate(species_list):
    print("Processing species nr. %s ..." % (i+1))
    occ_dict = occ.search(scientificName=sp, country=country_code)
    if not occ_dict['results']:
        continue
    occ_gdf = occs_to_gdf(occ_dict)
    # project to the same crs as exported NUTS data ('EPSG:3035')
    occ_gdf.to_crs(epsg=3035, inplace=True)

    occ_gdf.to_csv(os.path.join(out_dir, "gbif_occ_" + occ_dict['results'][0]['scientificName'].
예제 #13
0
import config
sppList = config.sciNames1719

df0 = pd.DataFrame()

reclst = []
lstcols = ['ScienticName', 'nRecords']

print('+' * 60)
print('\n')

for spp in sppList:
    print('Working on the following species:', spp)
    # First use the species module to get the taxonKey for a species scientific name
    tkey = species.name_backbone(name=spp, rank='species')['usageKey']
    # Gather the occurrences dictionary using the appropriate criteria
    recs = occ.search(taxonKey=tkey,
                      hasCoordinate=True,
                      country='US',
                      geoSpatialIssue=False)

    if 'count' in recs:
        cnt = recs['count']
        print('  This species has', cnt, 'records')
    else:
        print('  This species has an UNKNOWN RECORD COUNT')
        cnt = -99
    reclst.append([spp, cnt])

print('+' * 60)
예제 #14
0
from pygbif import species, occurrences
from numpy import nan, empty 
TName = "Fagus sylvatica"
key = species.name_backbone(name=TName, rank="species")["usageKey"]
n = occurrences.count(taxonKey=key, isGeoreferenced=True)

if n > 200000:
    max = 200000
else:
    max = n
results = occurrences.search(taxonKey=key, limit=max)

print '(', key, ')', '-', format(n, ','), " ocurrence(s)"

# lonslats = []
latlon = empty([max,2], dtype=float, order='C')

for i, x in enumerate(results["results"]):
    #try:
        #if x['continent'].find('_') != -1:
            #Continent = ' '.join(x['continent'].split('_')).title()
        #else:
            #Continent = x['continent'].capitalize()
    #except:
            #Continent = ""  
    #try:
        #Country = x['country']
    #except:
        #Country = ""
           
    #try:
예제 #15
0
from pygbif import occurrences as occ
from pygbif import species

nm = species.name_backbone('Fagus sylvatica')['usageKey']

polys = ["POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))",
"POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
"POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",      
"POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",   
"POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
"POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",     
"POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",     
"POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",   
"POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))", 
"POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",      
"POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",     
"POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"]

results = []
for i in polys:
    res = []
    x = occ.search(taxonKey = nm, geometry = i)
    res.append(x['results'])
    while not x['endOfRecords']:
        x = occ.search(taxonKey = nm, geometry = i, offset = sum([ len(x) for x in res ]))
        res.append(x['results'])

    results.append([w for z in res for w in z])
    print 'polygon done'
       
allres = [w for z in results for w in z]
    37.83197310811593
]

csvfile = open(fn)
plantData = csv.DictReader(csvfile)
dlList = []

longmin = "decimalLongitude > %s" % boundBox[0]
latmin = "decimalLatitude > %s" % boundBox[1]
longmax = "decimalLongitude < %s" % boundBox[2]
latmax = "decimalLatitude < %s" % boundBox[3]

for row in plantData:
    sp_name = "%s" % row['Scientific Name']
    print(sp_name)
    gbifSpcInfo = species.name_backbone(name=sp_name)
    try:
        print(gbifSpcInfo['usageKey'])
    except:
        print(sp_name, "not found")
        continue

    taxonKeySel = "taxonKey = %s" % gbifSpcInfo['usageKey']
    print(taxonKeySel)
    dl = occ.download([
        taxonKeySel, 'basisOfRecord = HUMAN_OBSERVATION',
        'hasCoordinate = True', latmin, longmin, latmax, longmax
    ])

    dlMeta = occ.download_meta(dl[0])
    print(dlMeta)
예제 #17
0
def main():

    try:
        from pygbif import occurrences
        from pygbif import species
    except ImportError:
        grass.fatal(_("Cannot import pygbif (https://github.com/sckott/pygbif)"
                      " library."
                      " Please install it (pip install pygbif)"
                      " or ensure that it is on path"
                      " (use PYTHONPATH variable)."))

    # Parse input options
    output = options['output']
    mask = options['mask']
    species_maps = flags['i']
    no_region_limit = flags['r']
    no_topo = flags['b']
    print_species = flags['p']
    print_species_table = flags['t']
    print_species_shell = flags['g']
    print_occ_number = flags['o']
    allow_no_geom = flags['n']
    hasGeoIssue = flags['s']
    taxa_list = options['taxa'].split(',')
    institutionCode = options['institutioncode']
    basisofrecord = options['basisofrecord']
    recordedby = options['recordedby'].split(',')
    date_from = options['date_from']
    date_to = options['date_to']
    country = options['country']
    continent = options['continent']
    rank = options['rank']

    # Define static variable
    #Initialize cat
    cat = 0
    # Number of occurrences to fetch in one request
    chunk_size = 300
    # lat/lon proj string
    latlon_crs = ['+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000',
                  '+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0,0,0,0,0,0,0']
    # List attributes available in Darwin Core
    # not all attributes are returned in each request
    # to avoid key errors when accessing the dictionary returned by pygbif
    # presence of DWC keys in the returned dictionary is checked using this list
    # The number of keys in this list has to be equal to the number of columns
    # in the attribute table and the attributes written for each occurrence
    dwc_keys = ['key', 'taxonRank', 'taxonKey', 'taxonID', 'scientificName',
                'species', 'speciesKey', 'genericName', 'genus', 'genusKey',
                'family', 'familyKey', 'order', 'orderKey', 'class',
                'classKey', 'phylum', 'phylumKey', 'kingdom', 'kingdomKey',
                'eventDate', 'verbatimEventDate', 'startDayOfYear',
                'endDayOfYear', 'year', 'month', 'day', 'occurrenceID',
                'occurrenceStatus', 'occurrenceRemarks', 'Habitat',
                'basisOfRecord', 'preparations', 'sex', 'type', 'locality',
                'verbatimLocality', 'decimalLongitude', 'decimalLatitude',
                'geodeticDatum', 'higerGeography', 'continent', 'country',
                'countryCode', 'stateProvince', 'gbifID', 'protocol',
                'identifier', 'recordedBy', 'identificationID', 'identifiers',
                'dateIdentified', 'modified', 'institutionCode',
                'lastInterpreted', 'lastParsed', 'references', 'relations',
                'catalogNumber', 'occurrenceDetails', 'datasetKey',
                'datasetName', 'collectionCode', 'rights', 'rightsHolder',
                'license', 'publishingOrgKey', 'publishingCountry',
                'lastCrawled', 'specificEpithet', 'facts', 'issues',
                'extensions', 'language']
    # Deinfe columns for attribute table
    cols = [('cat',       'INTEGER PRIMARY KEY'),
            ('g_search',       'varchar(100)'),
            ('g_key',       'integer'),
            ('g_taxonrank',       'varchar(50)'),
            ('g_taxonkey',       'integer'),
            ('g_taxonid',       'varchar(50)'),
            ('g_scientificname',       'varchar(255)'),
            ('g_species',       'varchar(255)'),
            ('g_specieskey',       'integer'),
            ('g_genericname',       'varchar(255)'),
            ('g_genus',       'varchar(50)'),
            ('g_genuskey',       'integer'),
            ('g_family',       'varchar(50)'),
            ('g_familykey',       'integer'),
            ('g_order',       'varchar(50)'),
            ('g_orderkey',       'integer'),
            ('g_class',       'varchar(50)'),
            ('g_classkey',       'integer'),
            ('g_phylum',       'varchar(50)'),
            ('g_phylumkey',       'integer'),
            ('g_kingdom',       'varchar(50)'),
            ('g_kingdomkey',       'integer'),
            ('g_eventdate',       'text'),
            ('g_verbatimeventdate',       'varchar(50)'),
            ('g_startDayOfYear',       'integer'),
            ('g_endDayOfYear',       'integer'),
            ('g_year',       'integer'),
            ('g_month',       'integer'),
            ('g_day',       'integer'),
            ('g_occurrenceid',       'varchar(255)'),
            ('g_occurrenceStatus',       'varchar(50)'),
            ('g_occurrenceRemarks',       'varchar(50)'),
            ('g_Habitat',       'varchar(50)'),
            ('g_basisofrecord',       'varchar(50)'),
            ('g_preparations',       'varchar(50)'),
            ('g_sex',       'varchar(50)'),
            ('g_type',       'varchar(50)'),
            ('g_locality',       'varchar(255)'),
            ('g_verbatimlocality',       'varchar(255)'),
            ('g_decimallongitude',       'double precision'),
            ('g_decimallatitude',       'double precision'),
            ('g_geodeticdatum',       'varchar(50)'),
            ('g_higerGeography',       'varchar(255)'),
            ('g_continent',       'varchar(50)'),
            ('g_country',       'varchar(50)'),
            ('g_countryCode',       'varchar(50)'),
            ('g_stateProvince',       'varchar(50)'),
            ('g_gbifid',       'varchar(255)'),
            ('g_protocol',       'varchar(255)'),
            ('g_identifier',       'varchar(50)'),
            ('g_recordedby',       'varchar(255)'),
            ('g_identificationid',       'varchar(255)'),
            ('g_identifiers',       'text'),
            ('g_dateidentified',       'text'),
            ('g_modified',       'text'),
            ('g_institutioncode',       'varchar(50)'),
            ('g_lastinterpreted',       'text'),
            ('g_lastparsed',       'text'),
            ('g_references',       'varchar(255)'),
            ('g_relations',       'text'),
            ('g_catalognumber',       'varchar(50)'),
            ('g_occurrencedetails',       'text'),
            ('g_datasetkey',       'varchar(50)'),
            ('g_datasetname',       'varchar(255)'),
            ('g_collectioncode',       'varchar(50)'),
            ('g_rights',       'varchar(255)'),
            ('g_rightsholder',       'varchar(255)'),
            ('g_license',       'varchar(50)'),
            ('g_publishingorgkey',       'varchar(50)'),
            ('g_publishingcountry',       'varchar(50)'),
            ('g_lastcrawled',       'text'),
            ('g_specificepithet',       'varchar(50)'),
            ('g_facts',       'text'),
            ('g_issues',       'text'),
            ('g_extensions',       'text'),
            ('g_language',       'varchar(50)')]

    set_output_encoding()
    # Set temporal filter if requested by user
    # Initialize eventDate filter
    eventDate = None
    # Check if date from is compatible (ISO compliant)
    if date_from:
        try:
            parse(date_from)
        except:
            grass.fatal("Invalid invalid start date provided")

        if date_from and not date_to:
            eventDate = '{}'.format(date_from)
    # Check if date to is compatible (ISO compliant)
    if date_to:
        try:
            parse(date_to)
        except:
            grass.fatal("Invalid invalid end date provided")
        # Check if date to is after date_from
        if parse(date_from) < parse(date_to):
            eventDate = '{},{}'.format(date_from, date_to)
        else:
            grass.fatal("Invalid date range: End date has to be after start date!")
    # Set filter on basisOfRecord if requested by user
    if basisofrecord == 'ALL':
        basisOfRecord = None
    else:
        basisOfRecord = basisofrecord
    # Allow also occurrences with spatial issues if requested by user
    hasGeospatialIssue = False
    if hasGeoIssue:
        hasGeospatialIssue = True
    # Allow also occurrences without coordinates if requested by user
    hasCoordinate = True
    if allow_no_geom:
        hasCoordinate = False

    # Set reprojection parameters
    # Set target projection of current LOCATION
    target_crs = grass.read_command('g.proj', flags='fj').rstrip(os.linesep)
    target = osr.SpatialReference(target_crs)
    target.ImportFromProj4(target_crs)
    if target == 'XY location (unprojected)':
        grass.fatal("Sorry, XY locations are not supported!")

    # Set source projection from GBIF
    source = osr.SpatialReference()
    source.ImportFromEPSG(4326)
    if target_crs not in latlon_crs:
        transform = osr.CoordinateTransformation(source, target)
        reverse_transform = osr.CoordinateTransformation(target, source)

    # Generate WKT polygon to use for spatial filtering if requested
    if mask:
        if len(mask.split('@')) == 2:
            m = VectorTopo(mask.split('@')[0], mapset=mask.split('@')[1])
        else:
            m = VectorTopo(mask)
        if not m.exist():
            grass.fatal('Could not find vector map <{}>'.format(mask))
        m.open('r')
        if not m.is_open():
            grass.fatal('Could not open vector map <{}>'.format(mask))

        # Use map Bbox as spatial filter if map contains <> 1 area
        if m.number_of('areas') == 1:
            region_pol = [area.to_wkt() for area in m.viter("areas")][0]
        else:
            bbox = str(m.bbox()).replace('Bbox(', '').replace(' ', '').rstrip(')').split(',')
            region_pol = 'POLYGON(({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))'.format(bbox[2],
                         bbox[0], bbox[3], bbox[1])
        m.close()
    else:
        # Do not limit import spatially if LOCATION is able to take global data
        if no_region_limit:
            if target_crs not in latlon_crs:
                grass.fatal('Import of data from outside the current region is'
                            'only supported in a WGS84 location!')
            region_pol = None
        else:
            # Limit import spatially to current region
            # if LOCATION is !NOT! able to take global data
            # to avoid pprojection ERRORS
            region = grass.parse_command('g.region', flags='g')
            region_pol = 'POLYGON(({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))'.format(region['e'],
                         region['n'], region['w'], region['s'])

    # Do not reproject in latlon LOCATIONS
    if target_crs not in latlon_crs:
        pol = ogr.CreateGeometryFromWkt(region_pol)
        pol.Transform(reverse_transform)
        pol = pol.ExportToWkt()
    else:
        pol = region_pol

    # Create output map if not output maps for each species are requested
    if not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table:
        mapname = output
        new = Vector(mapname)
        new.open('w', tab_name=mapname, tab_cols=cols)
        cat = 1

    # Import data for each species
    for s in taxa_list:
        # Get the taxon key if not the taxon key is provided as input
        try:
            key = int(s)
        except:
            try:
                species_match = species.name_backbone(s, rank=rank,
                                                      strict=False,
                                                      verbose=True)
                key = species_match['usageKey']
            except:
                grass.error('Data request for taxon {} failed. Are you online?'.format(s))
                continue

        # Return matching taxon and alternatives and exit
        if print_species:
            print('Matching taxon for {} is:'.format(s))
            print('{} {}'.format(species_match['scientificName'], species_match['status']))
            if 'alternatives' in list(species_match.keys()):
                print('Alternative matches might be:'.format(s))
                for m in species_match['alternatives']:
                    print('{} {}'.format(m['scientificName'], m['status']))
            else:
                print('No alternatives found for the given taxon')
            continue
        if print_species_shell:
            print('match={}'.format(species_match['scientificName']))
            if 'alternatives' in list(species_match.keys()):
                alternatives = []
                for m in species_match['alternatives']:
                    alternatives.append(m['scientificName'])
                print('alternatives={}'.format(','.join(alternatives)))
            continue
        if print_species_table:
            if 'alternatives' in list(species_match.keys()):
                if len(species_match['alternatives']) == 0:
                    print('{0}|{1}|{2}|'.format(s, key, species_match['scientificName']))
                else:
                    alternatives = []
                    for m in species_match['alternatives']:
                        alternatives.append(m['scientificName'])
                    print('{0}|{1}|{2}|{3}'.format(s, key, species_match['scientificName'],
                                                    ','.join(alternatives)))
            continue
        try:
            returns_n = occurrences.search(taxonKey=key,
                                           hasGeospatialIssue=hasGeospatialIssue,
                                           hasCoordinate=hasCoordinate,
                                           institutionCode=institutionCode,
                                           basisOfRecord=basisOfRecord,
                                           recordedBy=recordedby,
                                           eventDate=eventDate,
                                           continent=continent,
                                           country=country,
                                           geometry=pol,
                                           limit=1)['count']
        except:
            grass.error('Data request for taxon {} faild. Are you online?'.format(s))
            returns_n = 0

        # Exit if search does not give a return
        # Print only number of returns for the given search and exit
        if print_occ_number:
            grass.message('Found {0} occurrences for taxon {1}...'.format(returns_n, s))
            continue
        elif returns_n <= 0:
            grass.warning('No occurrences for current search for taxon {0}...'.format(s))
            continue
        elif returns_n >= 200000:
            grass.warning('Your search for {1} returns {0} records.\n'
                          'Unfortunately, the GBIF search API is limited to 200,000 records per request.\n'
                          'The download will be incomplete. Please consider to split up your search.'.format(returns_n, s))

        # Get the number of chunks to download
        chunks = int(math.ceil(returns_n / float(chunk_size)))
        grass.verbose('Downloading {0} occurrences for taxon {1}...'.format(returns_n, s))

        # Create a map for each species if requested using map name as suffix
        if species_maps:
            mapname = '{}_{}'.format(s.replace(' ', '_'), output)

            new = Vector(mapname)
            new.open('w', tab_name=mapname, tab_cols=cols)
            cat = 0

        # Download the data from GBIF
        for c in range(chunks):
            # Define offset
            offset = c * chunk_size
            # Adjust chunk_size to the hard limit of 200,000 records in GBIF API
            # if necessary
            if offset + chunk_size >= 200000:
                chunk_size = 200000 - offset
            # Get the returns for the next chunk
            returns = occurrences.search(taxonKey=key,
                                         hasGeospatialIssue=hasGeospatialIssue,
                                         hasCoordinate=hasCoordinate,
                                         institutionCode=institutionCode,
                                         basisOfRecord=basisOfRecord,
                                         recordedBy=recordedby,
                                         eventDate=eventDate,
                                         continent=continent,
                                         country=country,
                                         geometry=pol,
                                         limit=chunk_size,
                                         offset=offset)

            # Write the returned data to map and attribute table
            for res in returns['results']:
                if target_crs not in latlon_crs:
                    point = ogr.CreateGeometryFromWkt('POINT ({} {})'.format(res['decimalLongitude'], res['decimalLatitude']))
                    point.Transform(transform)
                    x = point.GetX()
                    y = point.GetY()
                else:
                    x = res['decimalLongitude']
                    y = res['decimalLatitude']

                point = Point(x, y)

                for k in dwc_keys:
                    if k not in list(res.keys()):
                        res.update({k: None})

                cat = cat + 1
                new.write(point, cat=cat, attrs=(
                          '{}'.format(s),
                          res['key'],
                          res['taxonRank'],
                          res['taxonKey'],
                          res['taxonID'],
                          res['scientificName'],
                          res['species'],
                          res['speciesKey'],
                          res['genericName'],
                          res['genus'],
                          res['genusKey'],
                          res['family'],
                          res['familyKey'],
                          res['order'],
                          res['orderKey'],
                          res['class'],
                          res['classKey'],
                          res['phylum'],
                          res['phylumKey'],
                          res['kingdom'],
                          res['kingdomKey'],
                          '{}'.format(res['eventDate']) if res['eventDate'] else None,
                          '{}'.format(res['verbatimEventDate']) if res['verbatimEventDate'] else None,
                          res['startDayOfYear'],
                          res['endDayOfYear'],
                          res['year'],
                          res['month'],
                          res['day'],
                          res['occurrenceID'],
                          res['occurrenceStatus'],
                          res['occurrenceRemarks'],
                          res['Habitat'],
                          res['basisOfRecord'],
                          res['preparations'],
                          res['sex'],
                          res['type'],
                          res['locality'],
                          res['verbatimLocality'],
                          res['decimalLongitude'],
                          res['decimalLatitude'],
                          res['geodeticDatum'],
                          res['higerGeography'],
                          res['continent'],
                          res['country'],
                          res['countryCode'],
                          res['stateProvince'],
                          res['gbifID'],
                          res['protocol'],
                          res['identifier'],
                          res['recordedBy'],
                          res['identificationID'],
                          ','.join(res['identifiers']),
                          '{}'.format(res['dateIdentified']) if res['dateIdentified'] else None,
                          '{}'.format(res['modified']) if res['modified'] else None,
                          res['institutionCode'],
                          '{}'.format(res['lastInterpreted']) if res['lastInterpreted'] else None,
                          '{}'.format(res['lastParsed']) if res['lastParsed'] else None,
                          res['references'],
                          ','.join(res['relations']),
                          res['catalogNumber'],
                          '{}'.format(res['occurrenceDetails']) if res['occurrenceDetails'] else None,
                          res['datasetKey'],
                          res['datasetName'],
                          res['collectionCode'],
                          res['rights'],
                          res['rightsHolder'],
                          res['license'],
                          res['publishingOrgKey'],
                          res['publishingCountry'],
                          '{}'.format(res['lastCrawled']) if res['lastCrawled'] else None,
                          res['specificEpithet'],
                          ','.join(res['facts']),
                          ','.join(res['issues']),
                          ','.join(res['extensions']),
                          res['language'],))

                cat = cat + 1

        # Close the current map if a map for each species is requested
        if species_maps:
            new.table.conn.commit()
            new.close()
            if not no_topo:
                grass.run_command('v.build', map=mapname, option='build')

    # Close the output map if not a map for each species is requested
    if not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table:
        new.table.conn.commit()
        new.close()
        if not no_topo:
            grass.run_command('v.build', map=mapname, option='build')
예제 #18
0
def test_name_backbone():
    "species.name_backbone - basic test"
    res = species.name_backbone(name='Helianthus annuus')
    assert dict == res.__class__
    assert 22 == len(res)
    assert 'Helianthus annuus' == res['species']
예제 #19
0
def main():
    from dateutil.parser import parse

    try:
        from pygbif import occurrences
        from pygbif import species
    except ImportError:
        grass.fatal(
            _("Cannot import pygbif (https://github.com/sckott/pygbif)"
              " library."
              " Please install it (pip install pygbif)"
              " or ensure that it is on path"
              " (use PYTHONPATH variable)."))

    # Parse input options
    output = options["output"]
    mask = options["mask"]
    species_maps = flags["i"]
    no_region_limit = flags["r"]
    no_topo = flags["b"]
    print_species = flags["p"]
    print_species_table = flags["t"]
    print_species_shell = flags["g"]
    print_occ_number = flags["o"]
    allow_no_geom = flags["n"]
    hasGeoIssue = flags["s"]
    taxa_list = options["taxa"].split(",")
    institutionCode = options["institutioncode"]
    basisofrecord = options["basisofrecord"]
    recordedby = options["recordedby"].split(",")
    date_from = options["date_from"]
    date_to = options["date_to"]
    country = options["country"]
    continent = options["continent"]
    rank = options["rank"]

    # Define static variable
    # Initialize cat
    cat = 0
    # Number of occurrences to fetch in one request
    chunk_size = 300
    # lat/lon proj string
    latlon_crs = [
        "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000",
        "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0,0,0,0,0,0,0",
        "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000 +type=crs",
    ]
    # List attributes available in Darwin Core
    # not all attributes are returned in each request
    # to avoid key errors when accessing the dictionary returned by pygbif
    # presence of DWC keys in the returned dictionary is checked using this list
    # The number of keys in this list has to be equal to the number of columns
    # in the attribute table and the attributes written for each occurrence
    dwc_keys = [
        "key",
        "taxonRank",
        "taxonKey",
        "taxonID",
        "scientificName",
        "species",
        "speciesKey",
        "genericName",
        "genus",
        "genusKey",
        "family",
        "familyKey",
        "order",
        "orderKey",
        "class",
        "classKey",
        "phylum",
        "phylumKey",
        "kingdom",
        "kingdomKey",
        "eventDate",
        "verbatimEventDate",
        "startDayOfYear",
        "endDayOfYear",
        "year",
        "month",
        "day",
        "occurrenceID",
        "occurrenceStatus",
        "occurrenceRemarks",
        "Habitat",
        "basisOfRecord",
        "preparations",
        "sex",
        "type",
        "locality",
        "verbatimLocality",
        "decimalLongitude",
        "decimalLatitude",
        "coordinateUncertaintyInMeters",
        "geodeticDatum",
        "higerGeography",
        "continent",
        "country",
        "countryCode",
        "stateProvince",
        "gbifID",
        "protocol",
        "identifier",
        "recordedBy",
        "identificationID",
        "identifiers",
        "dateIdentified",
        "modified",
        "institutionCode",
        "lastInterpreted",
        "lastParsed",
        "references",
        "relations",
        "catalogNumber",
        "occurrenceDetails",
        "datasetKey",
        "datasetName",
        "collectionCode",
        "rights",
        "rightsHolder",
        "license",
        "publishingOrgKey",
        "publishingCountry",
        "lastCrawled",
        "specificEpithet",
        "facts",
        "issues",
        "extensions",
        "language",
    ]
    # Deinfe columns for attribute table
    cols = [
        ("cat", "INTEGER PRIMARY KEY"),
        ("g_search", "varchar(100)"),
        ("g_key", "integer"),
        ("g_taxonrank", "varchar(50)"),
        ("g_taxonkey", "integer"),
        ("g_taxonid", "varchar(50)"),
        ("g_scientificname", "varchar(255)"),
        ("g_species", "varchar(255)"),
        ("g_specieskey", "integer"),
        ("g_genericname", "varchar(255)"),
        ("g_genus", "varchar(50)"),
        ("g_genuskey", "integer"),
        ("g_family", "varchar(50)"),
        ("g_familykey", "integer"),
        ("g_order", "varchar(50)"),
        ("g_orderkey", "integer"),
        ("g_class", "varchar(50)"),
        ("g_classkey", "integer"),
        ("g_phylum", "varchar(50)"),
        ("g_phylumkey", "integer"),
        ("g_kingdom", "varchar(50)"),
        ("g_kingdomkey", "integer"),
        ("g_eventdate", "text"),
        ("g_verbatimeventdate", "varchar(50)"),
        ("g_startDayOfYear", "integer"),
        ("g_endDayOfYear", "integer"),
        ("g_year", "integer"),
        ("g_month", "integer"),
        ("g_day", "integer"),
        ("g_occurrenceid", "varchar(255)"),
        ("g_occurrenceStatus", "varchar(50)"),
        ("g_occurrenceRemarks", "varchar(50)"),
        ("g_Habitat", "varchar(50)"),
        ("g_basisofrecord", "varchar(50)"),
        ("g_preparations", "varchar(50)"),
        ("g_sex", "varchar(50)"),
        ("g_type", "varchar(50)"),
        ("g_locality", "varchar(255)"),
        ("g_verbatimlocality", "varchar(255)"),
        ("g_decimallongitude", "double precision"),
        ("g_decimallatitude", "double precision"),
        ("g_coordinateUncertaintyInMeters", "double precision"),
        ("g_geodeticdatum", "varchar(50)"),
        ("g_higerGeography", "varchar(255)"),
        ("g_continent", "varchar(50)"),
        ("g_country", "varchar(50)"),
        ("g_countryCode", "varchar(50)"),
        ("g_stateProvince", "varchar(50)"),
        ("g_gbifid", "varchar(255)"),
        ("g_protocol", "varchar(255)"),
        ("g_identifier", "varchar(50)"),
        ("g_recordedby", "varchar(255)"),
        ("g_identificationid", "varchar(255)"),
        ("g_identifiers", "text"),
        ("g_dateidentified", "text"),
        ("g_modified", "text"),
        ("g_institutioncode", "varchar(50)"),
        ("g_lastinterpreted", "text"),
        ("g_lastparsed", "text"),
        ("g_references", "varchar(255)"),
        ("g_relations", "text"),
        ("g_catalognumber", "varchar(50)"),
        ("g_occurrencedetails", "text"),
        ("g_datasetkey", "varchar(50)"),
        ("g_datasetname", "varchar(255)"),
        ("g_collectioncode", "varchar(50)"),
        ("g_rights", "varchar(255)"),
        ("g_rightsholder", "varchar(255)"),
        ("g_license", "varchar(50)"),
        ("g_publishingorgkey", "varchar(50)"),
        ("g_publishingcountry", "varchar(50)"),
        ("g_lastcrawled", "text"),
        ("g_specificepithet", "varchar(50)"),
        ("g_facts", "text"),
        ("g_issues", "text"),
        ("g_extensions", "text"),
        ("g_language", "varchar(50)"),
    ]

    # maybe no longer required in Python3
    set_output_encoding()
    # Set temporal filter if requested by user
    # Initialize eventDate filter
    eventDate = None
    # Check if date from is compatible (ISO compliant)
    if date_from:
        try:
            parse(date_from)
        except:
            grass.fatal("Invalid invalid start date provided")

        if date_from and not date_to:
            eventDate = "{}".format(date_from)
    # Check if date to is compatible (ISO compliant)
    if date_to:
        try:
            parse(date_to)
        except:
            grass.fatal("Invalid invalid end date provided")
        # Check if date to is after date_from
        if parse(date_from) < parse(date_to):
            eventDate = "{},{}".format(date_from, date_to)
        else:
            grass.fatal(
                "Invalid date range: End date has to be after start date!")
    # Set filter on basisOfRecord if requested by user
    if basisofrecord == "ALL":
        basisOfRecord = None
    else:
        basisOfRecord = basisofrecord
    # Allow also occurrences with spatial issues if requested by user
    hasGeospatialIssue = False
    if hasGeoIssue:
        hasGeospatialIssue = True
    # Allow also occurrences without coordinates if requested by user
    hasCoordinate = True
    if allow_no_geom:
        hasCoordinate = False

    # Set reprojection parameters
    # Set target projection of current LOCATION
    proj_info = grass.parse_command("g.proj", flags="g")
    target_crs = grass.read_command("g.proj", flags="fj").rstrip()
    target = osr.SpatialReference()

    # Prefer EPSG CRS definitions
    if proj_info["epsg"]:
        target.ImportFromEPSG(int(proj_info["epsg"]))
    else:
        target.ImportFromProj4(target_crs)

    # GDAL >= 3 swaps x and y axis, see: github.com/gdal/issues/1546
    if int(gdal_version[0]) >= 3:
        target.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER)

    if target_crs == "XY location (unprojected)":
        grass.fatal("Sorry, XY locations are not supported!")

    # Set source projection from GBIF
    source = osr.SpatialReference()
    source.ImportFromEPSG(4326)
    # GDAL >= 3 swaps x and y axis, see: github.com/gdal/issues/1546
    if int(gdal_version[0]) >= 3:
        source.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER)

    if target_crs not in latlon_crs:
        transform = osr.CoordinateTransformation(source, target)
        reverse_transform = osr.CoordinateTransformation(target, source)

    # Generate WKT polygon to use for spatial filtering if requested
    if mask:
        if len(mask.split("@")) == 2:
            m = VectorTopo(mask.split("@")[0], mapset=mask.split("@")[1])
        else:
            m = VectorTopo(mask)
        if not m.exist():
            grass.fatal("Could not find vector map <{}>".format(mask))
        m.open("r")
        if not m.is_open():
            grass.fatal("Could not open vector map <{}>".format(mask))

        # Use map Bbox as spatial filter if map contains <> 1 area
        if m.number_of("areas") == 1:
            region_pol = [area.to_wkt() for area in m.viter("areas")][0]
        else:
            bbox = (str(m.bbox()).replace("Bbox(", "").replace(
                " ", "").rstrip(")").split(","))
            region_pol = "POLYGON (({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))".format(
                bbox[2], bbox[0], bbox[3], bbox[1])
        m.close()
    else:
        # Do not limit import spatially if LOCATION is able to take global data
        if no_region_limit:
            if target_crs not in latlon_crs:
                grass.fatal("Import of data from outside the current region is"
                            "only supported in a WGS84 location!")
            region_pol = None
        else:
            # Limit import spatially to current region
            # if LOCATION is !NOT! able to take global data
            # to avoid pprojection ERRORS
            region = grass.parse_command("g.region", flags="g")
            region_pol = "POLYGON (({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))".format(
                region["e"], region["n"], region["w"], region["s"])

    # Do not reproject in latlon LOCATIONS
    if target_crs not in latlon_crs:
        pol = ogr.CreateGeometryFromWkt(region_pol)
        pol.Transform(reverse_transform)
        pol = pol.ExportToWkt()
    else:
        pol = region_pol

    # Create output map if not output maps for each species are requested
    if (not species_maps and not print_species and not print_species_shell
            and not print_occ_number and not print_species_table):
        mapname = output
        new = Vector(mapname)
        new.open("w", tab_name=mapname, tab_cols=cols)
        cat = 1

    # Import data for each species
    for s in taxa_list:
        # Get the taxon key if not the taxon key is provided as input
        try:
            key = int(s)
        except:
            try:
                species_match = species.name_backbone(s,
                                                      rank=rank,
                                                      strict=False,
                                                      verbose=True)
                key = species_match["usageKey"]
            except:
                grass.error(
                    "Data request for taxon {} failed. Are you online?".format(
                        s))
                continue

        # Return matching taxon and alternatives and exit
        if print_species:
            print("Matching taxon for {} is:".format(s))
            print("{} {}".format(species_match["scientificName"],
                                 species_match["status"]))
            if "alternatives" in list(species_match.keys()):
                print("Alternative matches might be: {}".format(s))
                for m in species_match["alternatives"]:
                    print("{} {}".format(m["scientificName"], m["status"]))
            else:
                print("No alternatives found for the given taxon")
            continue
        if print_species_shell:
            print("match={}".format(species_match["scientificName"]))
            if "alternatives" in list(species_match.keys()):
                alternatives = []
                for m in species_match["alternatives"]:
                    alternatives.append(m["scientificName"])
                print("alternatives={}".format(",".join(alternatives)))
            continue
        if print_species_table:
            if "alternatives" in list(species_match.keys()):
                if len(species_match["alternatives"]) == 0:
                    print("{0}|{1}|{2}|".format(
                        s, key, species_match["scientificName"]))
                else:
                    alternatives = []
                    for m in species_match["alternatives"]:
                        alternatives.append(m["scientificName"])
                    print("{0}|{1}|{2}|{3}".format(
                        s,
                        key,
                        species_match["scientificName"],
                        ",".join(alternatives),
                    ))
            continue
        try:
            returns_n = occurrences.search(
                taxonKey=key,
                hasGeospatialIssue=hasGeospatialIssue,
                hasCoordinate=hasCoordinate,
                institutionCode=institutionCode,
                basisOfRecord=basisOfRecord,
                recordedBy=recordedby,
                eventDate=eventDate,
                continent=continent,
                country=country,
                geometry=pol,
                limit=1,
            )["count"]
        except:
            grass.error(
                "Data request for taxon {} faild. Are you online?".format(s))
            returns_n = 0

        # Exit if search does not give a return
        # Print only number of returns for the given search and exit
        if print_occ_number:
            print("Found {0} occurrences for taxon {1}...".format(
                returns_n, s))
            continue
        elif returns_n <= 0:
            grass.warning(
                "No occurrences for current search for taxon {0}...".format(s))
            continue
        elif returns_n >= 200000:
            grass.warning(
                "Your search for {1} returns {0} records.\n"
                "Unfortunately, the GBIF search API is limited to 200,000 records per request.\n"
                "The download will be incomplete. Please consider to split up your search."
                .format(returns_n, s))

        # Get the number of chunks to download
        chunks = int(math.ceil(returns_n / float(chunk_size)))
        grass.verbose("Downloading {0} occurrences for taxon {1}...".format(
            returns_n, s))

        # Create a map for each species if requested using map name as suffix
        if species_maps:
            mapname = "{}_{}".format(s.replace(" ", "_"), output)

            new = Vector(mapname)
            new.open("w", tab_name=mapname, tab_cols=cols)
            cat = 0

        # Download the data from GBIF
        for c in range(chunks):
            # Define offset
            offset = c * chunk_size
            # Adjust chunk_size to the hard limit of 200,000 records in GBIF API
            # if necessary
            if offset + chunk_size >= 200000:
                chunk_size = 200000 - offset
            # Get the returns for the next chunk
            returns = occurrences.search(
                taxonKey=key,
                hasGeospatialIssue=hasGeospatialIssue,
                hasCoordinate=hasCoordinate,
                institutionCode=institutionCode,
                basisOfRecord=basisOfRecord,
                recordedBy=recordedby,
                eventDate=eventDate,
                continent=continent,
                country=country,
                geometry=pol,
                limit=chunk_size,
                offset=offset,
            )

            # Write the returned data to map and attribute table
            for res in returns["results"]:
                if target_crs not in latlon_crs:
                    point = ogr.CreateGeometryFromWkt("POINT ({} {})".format(
                        res["decimalLongitude"], res["decimalLatitude"]))
                    point.Transform(transform)
                    x = point.GetX()
                    y = point.GetY()
                else:
                    x = res["decimalLongitude"]
                    y = res["decimalLatitude"]

                point = Point(x, y)

                for k in dwc_keys:
                    if k not in list(res.keys()):
                        res.update({k: None})

                cat = cat + 1
                new.write(
                    point,
                    cat=cat,
                    attrs=(
                        "{}".format(s),
                        res["key"],
                        res["taxonRank"],
                        res["taxonKey"],
                        res["taxonID"],
                        res["scientificName"],
                        res["species"],
                        res["speciesKey"],
                        res["genericName"],
                        res["genus"],
                        res["genusKey"],
                        res["family"],
                        res["familyKey"],
                        res["order"],
                        res["orderKey"],
                        res["class"],
                        res["classKey"],
                        res["phylum"],
                        res["phylumKey"],
                        res["kingdom"],
                        res["kingdomKey"],
                        "{}".format(res["eventDate"])
                        if res["eventDate"] else None,
                        "{}".format(res["verbatimEventDate"])
                        if res["verbatimEventDate"] else None,
                        res["startDayOfYear"],
                        res["endDayOfYear"],
                        res["year"],
                        res["month"],
                        res["day"],
                        res["occurrenceID"],
                        res["occurrenceStatus"],
                        res["occurrenceRemarks"],
                        res["Habitat"],
                        res["basisOfRecord"],
                        res["preparations"],
                        res["sex"],
                        res["type"],
                        res["locality"],
                        res["verbatimLocality"],
                        res["decimalLongitude"],
                        res["decimalLatitude"],
                        res["coordinateUncertaintyInMeters"],
                        res["geodeticDatum"],
                        res["higerGeography"],
                        res["continent"],
                        res["country"],
                        res["countryCode"],
                        res["stateProvince"],
                        res["gbifID"],
                        res["protocol"],
                        res["identifier"],
                        res["recordedBy"],
                        res["identificationID"],
                        ",".join(res["identifiers"]),
                        "{}".format(res["dateIdentified"])
                        if res["dateIdentified"] else None,
                        "{}".format(res["modified"])
                        if res["modified"] else None,
                        res["institutionCode"],
                        "{}".format(res["lastInterpreted"])
                        if res["lastInterpreted"] else None,
                        "{}".format(res["lastParsed"])
                        if res["lastParsed"] else None,
                        res["references"],
                        ",".join(res["relations"]),
                        res["catalogNumber"],
                        "{}".format(res["occurrenceDetails"])
                        if res["occurrenceDetails"] else None,
                        res["datasetKey"],
                        res["datasetName"],
                        res["collectionCode"],
                        res["rights"],
                        res["rightsHolder"],
                        res["license"],
                        res["publishingOrgKey"],
                        res["publishingCountry"],
                        "{}".format(res["lastCrawled"])
                        if res["lastCrawled"] else None,
                        res["specificEpithet"],
                        ",".join(res["facts"]),
                        ",".join(res["issues"]),
                        ",".join(res["extensions"]),
                        res["language"],
                    ),
                )

                cat = cat + 1

        # Close the current map if a map for each species is requested
        if species_maps:
            new.table.conn.commit()
            new.close()
            if not no_topo:
                grass.run_command("v.build", map=mapname, option="build")

            # Write history to map
            grass.vector_history(mapname)

    # Close the output map if not a map for each species is requested
    if (not species_maps and not print_species and not print_species_shell
            and not print_occ_number and not print_species_table):
        new.table.conn.commit()
        new.close()
        if not no_topo:
            grass.run_command("v.build", map=mapname, option="build")

        # Write history to map
        grass.vector_history(mapname)
예제 #20
0
def gbif_serach(taxon_name):
    """
  API to GBIF database.
  :param taxon_name: Scientific name of tree species (e.g 'Fagus sylvatica')
  """

    try:
        from pygbif import occurrences as occ
        from pygbif import species

        polys = [
            "POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))",
            "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
            "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",
            "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",
            "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
            "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",
            "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",
            "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",
            "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))",
            "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",
            "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",
            "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"
        ]

        nm = species.name_backbone(taxon_name)['usageKey']
        logger.info('Taxon Key found: %s' % nm)
    except Exception as e:
        logger.error('Taxon Key defining failed %s' % (e))

    try:
        results = []
        for i, p in enumerate(polys):
            res = []
            x = occ.search(taxonKey=nm, geometry=p)
            res.append(x['results'])
            while not x['endOfRecords']:
                x = occ.search(taxonKey=nm,
                               geometry=p,
                               offset=sum([len(x) for x in res]))
                res.append(x['results'])
            results.append([w for z in res for w in z])
            logger.info('Polygon %s/%s done' % (i + 1, len(polys)))
        logger.info('***** GBIF data fetching done! ***** ')
    except Exception as e:
        logger.error('Coordinate fetching failed: %s' % (e))

    try:
        allres = [w for z in results for w in z]
        coords = [{k: v
                   for k, v in w.items() if k.startswith('decimal')}
                  for w in allres]

        from numpy import empty
        latlon = empty([len(coords), 2], dtype=float, order='C')

        for i, coord in enumerate(coords):
            latlon[i][0] = coord['decimalLatitude']
            latlon[i][1] = coord['decimalLongitude']

        logger.info('read in PA coordinates for %s rows ' % len(ll[:, 0]))
    except Exception as e:
        logger.error('failed search GBIF data %s' % (e))
    return latlon
def test_name_backbone():
    "species.name_backbone - basic test"
    res = species.name_backbone(name="Helianthus annuus")
    assert dict == res.__class__
    assert 22 == len(res)
    assert "Helianthus annuus" == res["species"]
def test_name_backbone_multiple_matches():
    "species.name_backbone - multiple matches"
    res = species.name_backbone(name="Aso")
    assert dict == res.__class__
    assert 4 == len(res)
    assert "Multiple equal matches for Aso" == res["note"]
예제 #23
0
def test_name_backbone():
    "species.name_backbone - basic test"
    res = species.name_backbone(name="Helianthus annuus")
    assert dict == res.__class__
    assert 22 == len(res)
    assert "Helianthus annuus" == res["species"]
예제 #24
0
def test_name_backbone_multiple_matches():
    "species.name_backbone - multiple matches"
    res = species.name_backbone(name="Aso")
    assert dict == res.__class__
    assert 4 == len(res)
    assert "No match because of too little confidence" == res["note"]
예제 #25
0
def test_name_backbone_multiple_matches():
    "species.name_backbone - multiple matches"
    res = species.name_backbone(name='Aso')
    assert dict == res.__class__
    assert 4 == len(res)
    assert 'No match because of too little confidence' == res['note']
예제 #26
0
from pygbif import occurrences as occ
from pygbif import species

nm = species.name_backbone('Fagus sylvatica')['usageKey']

polys = [
    "POLYGON ((-13.9746093699999996 66.1882478999999933, -6.4746093699999996 66.1882478999999933, -6.4746093699999996 57.4422366399999973, -13.9746093699999996 57.4422366399999973, -13.9746093699999996 66.1882478999999933))",
    "POLYGON ((-6.4746093699999996 66.1882478999999933, 8.5253906300000004 66.1882478999999933, 8.5253906300000004 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 66.1882478999999933))",
    "POLYGON ((8.5253906300000004 66.1882478999999933, 23.5253906300000004 66.1882478999999933, 23.5253906300000004 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 66.1882478999999933))",
    "POLYGON ((23.5253906300000004 66.1882478999999933, 31.7285156300000004 66.1882478999999933, 31.7285156300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 66.1882478999999933))",
    "POLYGON ((-13.9746093699999996 42.4422366399999973, -13.9746093699999996 57.4422366399999973, -6.4746093699999996 57.4422366399999973, -6.4746093699999996 42.4422366399999973, -13.9746093699999996 42.4422366399999973))",
    "POLYGON ((-6.4746093699999996 42.4422366399999973, -6.4746093699999996 57.4422366399999973, 8.5253906300000004 57.4422366399999973, 8.5253906300000004 42.4422366399999973, -6.4746093699999996 42.4422366399999973))",
    "POLYGON ((8.5253906300000004 42.4422366399999973, 8.5253906300000004 57.4422366399999973, 23.5253906300000004 57.4422366399999973, 23.5253906300000004 42.4422366399999973, 8.5253906300000004 42.4422366399999973))",
    "POLYGON ((31.7285156300000004 57.4422366399999973, 31.7285156300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 57.4422366399999973, 31.7285156300000004 57.4422366399999973))",
    "POLYGON ((-6.4746093699999996 34.9422366399999973, -13.9746093699999996 34.9422366399999973, -13.9746093699999996 42.4422366399999973, -6.4746093699999996 42.4422366399999973, -6.4746093699999996 34.9422366399999973))",
    "POLYGON ((8.5253906300000004 34.9422366399999973, -6.4746093699999996 34.9422366399999973, -6.4746093699999996 42.4422366399999973, 8.5253906300000004 42.4422366399999973, 8.5253906300000004 34.9422366399999973))",
    "POLYGON ((23.5253906300000004 34.9422366399999973, 8.5253906300000004 34.9422366399999973, 8.5253906300000004 42.4422366399999973, 23.5253906300000004 42.4422366399999973, 23.5253906300000004 34.9422366399999973))",
    "POLYGON ((31.7285156300000004 42.4422366399999973, 31.7285156300000004 34.9422366399999973, 23.5253906300000004 34.9422366399999973, 23.5253906300000004 42.4422366399999973, 31.7285156300000004 42.4422366399999973))"
]

results = []
for i in polys:
    res = []
    x = occ.search(taxonKey=nm, geometry=i)
    res.append(x['results'])
    while not x['endOfRecords']:
        x = occ.search(taxonKey=nm,
                       geometry=i,
                       offset=sum([len(x) for x in res]))
        res.append(x['results'])
예제 #27
0
    def get_gbif_occs(self, geometry=False, tol=0):
        """
        Query the gbif database for occurrence data.
        """

        # Create a file to store occurrence data.
        self.occfile = os.path.join(
            self.outdir, self.params['spname'].replace(" ", "_") + ".csv")

        # Get the usageKey for species of interest.
        self.key = species.name_backbone(name=self.params['spname'],
                                         rank='species')['usageKey']

        # Create latitude/longitude lists.
        self.lats = []
        self.lons = []

        # Build dicts for optional params.
        # if self.params['basis'] == True:
        basis_params = dict(basisOfRecord=[
            'HUMAN_OBSERVATION', 'LIVING_SPECIMEN', 'FOSSIL_SPECIMEN'
        ], )
        # if self.params['continent'] is not None:
        continent_params = dict(continent=self.params['continent'])
        if geometry == True:
            geo_orient = shapely.geometry.polygon.orient(
                self.geometry['geometry'][0],
                1.0)  # Counter-clockwise for GBIF.
            geometry_bounds = dict(geometry=str(geo_orient.simplify(tol)))
        else:
            geometry_bounds = dict(place='holder')
        search_bounds = dict(
            decimalLatitude=','.join(
                [str(self.params['ymin']),
                 str(self.params['ymax'])]),
            decimalLongitude=','.join(
                [str(self.params['xmin']),
                 str(self.params['xmax'])]),
        )

        # Run a while-loop to go through all observations.  By default, tries to narrow to native range.
        # Don't pass lat/long bounds if none were entered.
        curr_offset = 0
        end_records = False
        while not end_records:
            occ_records = occ.search(
                taxonKey=self.key,
                hasCoordinate=True,
                # hasGeospatialIssue = False,
                **{
                    k: v
                    for k, v in basis_params.items()
                    if self.params['basis'] == True
                },
                **{
                    k: v
                    for k, v in continent_params.items()
                    if self.params['continent'] is not None
                },
                **{
                    k: v
                    for k, v in geometry_bounds.items() if geometry == True
                },
                **{k: v
                   for k, v in search_bounds.items() if 'None' not in v},
                offset=curr_offset)
            end_records = occ_records['endOfRecords']
            curr_offset += occ_records['limit']

            # Add latitude/longitude results to lists.
            self.lats.extend(
                [i['decimalLatitude'] for i in occ_records['results']])
            self.lons.extend(
                [i['decimalLongitude'] for i in occ_records['results']])

            # Print a dot on each cycle to show progress.
            print(".", end="")

            # When end of data is reached: build pandas dataframe from lists and remove duplicate data points.
            if occ_records['endOfRecords']:
                df = pd.DataFrame({
                    'Latitude': self.lats,
                    'Longitude': self.lons
                })
                df = df.drop_duplicates().reset_index()
                df = df.drop('index', axis=1)

                # Filter outliers.
                df = df[(np.abs(stats.zscore(df)) < 3).all(axis=1)]

                # Reform the lists by subsetting the dataframe.
                self.lats = list(df['Latitude'])
                self.lons = list(df['Longitude'])

                # Print final number of records.
                print(f' Found {len(self.lats)} records.')

        # Build array to write to CSV file.  np.vstack layers arrays vertically, where each layer is species-lat-lon.
        # np.repeat copies the species names as many times as there are entries.  It also combines with zip() to put
        # a newline char at the end of each layer.
        csvarr = np.vstack([
            np.repeat(self.params['spname'].replace(" ", "_"), len(self.lats)),
            self.lats,
            [
                "{}{}".format(a_, b_)
                for a_, b_ in zip(self.lons, np.repeat('\n', len(self.lats)))
            ]
        ]).T

        # Write array to CSV file.
        with open(self.occfile, 'w') as f:
            f.write('Species,Latitude,Longitude\n')
            for line in csvarr:
                f.write(",".join(line))

        # Transform lists to arrays for downstream application.
        self.lats = np.array(self.lats)
        self.lons = np.array(self.lons)
예제 #28
0
from pygbif import species, occurrences
from numpy import nan, empty
TName = "Fagus sylvatica"
key = species.name_backbone(name=TName, rank="species")["usageKey"]
n = occurrences.count(taxonKey=key, isGeoreferenced=True)

if n > 200000:
    max = 200000
else:
    max = n
results = occurrences.search(taxonKey=key, limit=max)

print '(', key, ')', '-', format(n, ','), " ocurrence(s)"

# lonslats = []
latlon = empty([max, 2], dtype=float, order='C')

for i, x in enumerate(results["results"]):
    #try:
    #if x['continent'].find('_') != -1:
    #Continent = ' '.join(x['continent'].split('_')).title()
    #else:
    #Continent = x['continent'].capitalize()
    #except:
    #Continent = ""
    #try:
    #Country = x['country']
    #except:
    #Country = ""

    #try:
예제 #29
0
"""
Description: Species-concepts change over time, sometimes with a spatial
component (e.g., changes in range delination of closely related species or
subspecies).  Retrieval of data for the wrong species-concept would introduce
error.  Therefore, the first step is to sort out species concepts of different
datasets to identify concepts that can be investigated.

For this project/effort, individual species-concepts will be identified,
crosswalked to concepts from various datasets, and stored in a table within
a database.

For now, a single species has been manually entered into species-concepts
for development.
"""
from pygbif import species
key = species.name_backbone(name='Ammodramus maritimus macgillivraii',
                            rank='species')['usageKey']
print(key)
예제 #30
0
def get_taxonomy(name):
    res = species.name_backbone(name)
    return res