def main(): from dateutil.parser import parse try: from pygbif import occurrences from pygbif import species except ImportError: grass.fatal( _("Cannot import pygbif (https://github.com/sckott/pygbif)" " library." " Please install it (pip install pygbif)" " or ensure that it is on path" " (use PYTHONPATH variable).")) # Parse input options output = options["output"] mask = options["mask"] species_maps = flags["i"] no_region_limit = flags["r"] no_topo = flags["b"] print_species = flags["p"] print_species_table = flags["t"] print_species_shell = flags["g"] print_occ_number = flags["o"] allow_no_geom = flags["n"] hasGeoIssue = flags["s"] taxa_list = options["taxa"].split(",") institutionCode = options["institutioncode"] basisofrecord = options["basisofrecord"] recordedby = options["recordedby"].split(",") date_from = options["date_from"] date_to = options["date_to"] country = options["country"] continent = options["continent"] rank = options["rank"] # Define static variable # Initialize cat cat = 0 # Number of occurrences to fetch in one request chunk_size = 300 # lat/lon proj string latlon_crs = [ "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000", "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0,0,0,0,0,0,0", "+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000 +type=crs", ] # List attributes available in Darwin Core # not all attributes are returned in each request # to avoid key errors when accessing the dictionary returned by pygbif # presence of DWC keys in the returned dictionary is checked using this list # The number of keys in this list has to be equal to the number of columns # in the attribute table and the attributes written for each occurrence dwc_keys = [ "key", "taxonRank", "taxonKey", "taxonID", "scientificName", "species", "speciesKey", "genericName", "genus", "genusKey", "family", "familyKey", "order", "orderKey", "class", "classKey", "phylum", "phylumKey", "kingdom", "kingdomKey", "eventDate", "verbatimEventDate", "startDayOfYear", "endDayOfYear", "year", "month", "day", "occurrenceID", "occurrenceStatus", "occurrenceRemarks", "Habitat", "basisOfRecord", "preparations", "sex", "type", "locality", "verbatimLocality", "decimalLongitude", "decimalLatitude", "coordinateUncertaintyInMeters", "geodeticDatum", "higerGeography", "continent", "country", "countryCode", "stateProvince", "gbifID", "protocol", "identifier", "recordedBy", "identificationID", "identifiers", "dateIdentified", "modified", "institutionCode", "lastInterpreted", "lastParsed", "references", "relations", "catalogNumber", "occurrenceDetails", "datasetKey", "datasetName", "collectionCode", "rights", "rightsHolder", "license", "publishingOrgKey", "publishingCountry", "lastCrawled", "specificEpithet", "facts", "issues", "extensions", "language", ] # Deinfe columns for attribute table cols = [ ("cat", "INTEGER PRIMARY KEY"), ("g_search", "varchar(100)"), ("g_key", "integer"), ("g_taxonrank", "varchar(50)"), ("g_taxonkey", "integer"), ("g_taxonid", "varchar(50)"), ("g_scientificname", "varchar(255)"), ("g_species", "varchar(255)"), ("g_specieskey", "integer"), ("g_genericname", "varchar(255)"), ("g_genus", "varchar(50)"), ("g_genuskey", "integer"), ("g_family", "varchar(50)"), ("g_familykey", "integer"), ("g_order", "varchar(50)"), ("g_orderkey", "integer"), ("g_class", "varchar(50)"), ("g_classkey", "integer"), ("g_phylum", "varchar(50)"), ("g_phylumkey", "integer"), ("g_kingdom", "varchar(50)"), ("g_kingdomkey", "integer"), ("g_eventdate", "text"), ("g_verbatimeventdate", "varchar(50)"), ("g_startDayOfYear", "integer"), ("g_endDayOfYear", "integer"), ("g_year", "integer"), ("g_month", "integer"), ("g_day", "integer"), ("g_occurrenceid", "varchar(255)"), ("g_occurrenceStatus", "varchar(50)"), ("g_occurrenceRemarks", "varchar(50)"), ("g_Habitat", "varchar(50)"), ("g_basisofrecord", "varchar(50)"), ("g_preparations", "varchar(50)"), ("g_sex", "varchar(50)"), ("g_type", "varchar(50)"), ("g_locality", "varchar(255)"), ("g_verbatimlocality", "varchar(255)"), ("g_decimallongitude", "double precision"), ("g_decimallatitude", "double precision"), ("g_coordinateUncertaintyInMeters", "double precision"), ("g_geodeticdatum", "varchar(50)"), ("g_higerGeography", "varchar(255)"), ("g_continent", "varchar(50)"), ("g_country", "varchar(50)"), ("g_countryCode", "varchar(50)"), ("g_stateProvince", "varchar(50)"), ("g_gbifid", "varchar(255)"), ("g_protocol", "varchar(255)"), ("g_identifier", "varchar(50)"), ("g_recordedby", "varchar(255)"), ("g_identificationid", "varchar(255)"), ("g_identifiers", "text"), ("g_dateidentified", "text"), ("g_modified", "text"), ("g_institutioncode", "varchar(50)"), ("g_lastinterpreted", "text"), ("g_lastparsed", "text"), ("g_references", "varchar(255)"), ("g_relations", "text"), ("g_catalognumber", "varchar(50)"), ("g_occurrencedetails", "text"), ("g_datasetkey", "varchar(50)"), ("g_datasetname", "varchar(255)"), ("g_collectioncode", "varchar(50)"), ("g_rights", "varchar(255)"), ("g_rightsholder", "varchar(255)"), ("g_license", "varchar(50)"), ("g_publishingorgkey", "varchar(50)"), ("g_publishingcountry", "varchar(50)"), ("g_lastcrawled", "text"), ("g_specificepithet", "varchar(50)"), ("g_facts", "text"), ("g_issues", "text"), ("g_extensions", "text"), ("g_language", "varchar(50)"), ] # maybe no longer required in Python3 set_output_encoding() # Set temporal filter if requested by user # Initialize eventDate filter eventDate = None # Check if date from is compatible (ISO compliant) if date_from: try: parse(date_from) except: grass.fatal("Invalid invalid start date provided") if date_from and not date_to: eventDate = "{}".format(date_from) # Check if date to is compatible (ISO compliant) if date_to: try: parse(date_to) except: grass.fatal("Invalid invalid end date provided") # Check if date to is after date_from if parse(date_from) < parse(date_to): eventDate = "{},{}".format(date_from, date_to) else: grass.fatal( "Invalid date range: End date has to be after start date!") # Set filter on basisOfRecord if requested by user if basisofrecord == "ALL": basisOfRecord = None else: basisOfRecord = basisofrecord # Allow also occurrences with spatial issues if requested by user hasGeospatialIssue = False if hasGeoIssue: hasGeospatialIssue = True # Allow also occurrences without coordinates if requested by user hasCoordinate = True if allow_no_geom: hasCoordinate = False # Set reprojection parameters # Set target projection of current LOCATION proj_info = grass.parse_command("g.proj", flags="g") target_crs = grass.read_command("g.proj", flags="fj").rstrip() target = osr.SpatialReference() # Prefer EPSG CRS definitions if proj_info["epsg"]: target.ImportFromEPSG(int(proj_info["epsg"])) else: target.ImportFromProj4(target_crs) # GDAL >= 3 swaps x and y axis, see: github.com/gdal/issues/1546 if int(gdal_version[0]) >= 3: target.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) if target_crs == "XY location (unprojected)": grass.fatal("Sorry, XY locations are not supported!") # Set source projection from GBIF source = osr.SpatialReference() source.ImportFromEPSG(4326) # GDAL >= 3 swaps x and y axis, see: github.com/gdal/issues/1546 if int(gdal_version[0]) >= 3: source.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) if target_crs not in latlon_crs: transform = osr.CoordinateTransformation(source, target) reverse_transform = osr.CoordinateTransformation(target, source) # Generate WKT polygon to use for spatial filtering if requested if mask: if len(mask.split("@")) == 2: m = VectorTopo(mask.split("@")[0], mapset=mask.split("@")[1]) else: m = VectorTopo(mask) if not m.exist(): grass.fatal("Could not find vector map <{}>".format(mask)) m.open("r") if not m.is_open(): grass.fatal("Could not open vector map <{}>".format(mask)) # Use map Bbox as spatial filter if map contains <> 1 area if m.number_of("areas") == 1: region_pol = [area.to_wkt() for area in m.viter("areas")][0] else: bbox = (str(m.bbox()).replace("Bbox(", "").replace( " ", "").rstrip(")").split(",")) region_pol = "POLYGON (({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))".format( bbox[2], bbox[0], bbox[3], bbox[1]) m.close() else: # Do not limit import spatially if LOCATION is able to take global data if no_region_limit: if target_crs not in latlon_crs: grass.fatal("Import of data from outside the current region is" "only supported in a WGS84 location!") region_pol = None else: # Limit import spatially to current region # if LOCATION is !NOT! able to take global data # to avoid pprojection ERRORS region = grass.parse_command("g.region", flags="g") region_pol = "POLYGON (({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))".format( region["e"], region["n"], region["w"], region["s"]) # Do not reproject in latlon LOCATIONS if target_crs not in latlon_crs: pol = ogr.CreateGeometryFromWkt(region_pol) pol.Transform(reverse_transform) pol = pol.ExportToWkt() else: pol = region_pol # Create output map if not output maps for each species are requested if (not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table): mapname = output new = Vector(mapname) new.open("w", tab_name=mapname, tab_cols=cols) cat = 1 # Import data for each species for s in taxa_list: # Get the taxon key if not the taxon key is provided as input try: key = int(s) except: try: species_match = species.name_backbone(s, rank=rank, strict=False, verbose=True) key = species_match["usageKey"] except: grass.error( "Data request for taxon {} failed. Are you online?".format( s)) continue # Return matching taxon and alternatives and exit if print_species: print("Matching taxon for {} is:".format(s)) print("{} {}".format(species_match["scientificName"], species_match["status"])) if "alternatives" in list(species_match.keys()): print("Alternative matches might be: {}".format(s)) for m in species_match["alternatives"]: print("{} {}".format(m["scientificName"], m["status"])) else: print("No alternatives found for the given taxon") continue if print_species_shell: print("match={}".format(species_match["scientificName"])) if "alternatives" in list(species_match.keys()): alternatives = [] for m in species_match["alternatives"]: alternatives.append(m["scientificName"]) print("alternatives={}".format(",".join(alternatives))) continue if print_species_table: if "alternatives" in list(species_match.keys()): if len(species_match["alternatives"]) == 0: print("{0}|{1}|{2}|".format( s, key, species_match["scientificName"])) else: alternatives = [] for m in species_match["alternatives"]: alternatives.append(m["scientificName"]) print("{0}|{1}|{2}|{3}".format( s, key, species_match["scientificName"], ",".join(alternatives), )) continue try: returns_n = occurrences.search( taxonKey=key, hasGeospatialIssue=hasGeospatialIssue, hasCoordinate=hasCoordinate, institutionCode=institutionCode, basisOfRecord=basisOfRecord, recordedBy=recordedby, eventDate=eventDate, continent=continent, country=country, geometry=pol, limit=1, )["count"] except: grass.error( "Data request for taxon {} faild. Are you online?".format(s)) returns_n = 0 # Exit if search does not give a return # Print only number of returns for the given search and exit if print_occ_number: print("Found {0} occurrences for taxon {1}...".format( returns_n, s)) continue elif returns_n <= 0: grass.warning( "No occurrences for current search for taxon {0}...".format(s)) continue elif returns_n >= 200000: grass.warning( "Your search for {1} returns {0} records.\n" "Unfortunately, the GBIF search API is limited to 200,000 records per request.\n" "The download will be incomplete. Please consider to split up your search." .format(returns_n, s)) # Get the number of chunks to download chunks = int(math.ceil(returns_n / float(chunk_size))) grass.verbose("Downloading {0} occurrences for taxon {1}...".format( returns_n, s)) # Create a map for each species if requested using map name as suffix if species_maps: mapname = "{}_{}".format(s.replace(" ", "_"), output) new = Vector(mapname) new.open("w", tab_name=mapname, tab_cols=cols) cat = 0 # Download the data from GBIF for c in range(chunks): # Define offset offset = c * chunk_size # Adjust chunk_size to the hard limit of 200,000 records in GBIF API # if necessary if offset + chunk_size >= 200000: chunk_size = 200000 - offset # Get the returns for the next chunk returns = occurrences.search( taxonKey=key, hasGeospatialIssue=hasGeospatialIssue, hasCoordinate=hasCoordinate, institutionCode=institutionCode, basisOfRecord=basisOfRecord, recordedBy=recordedby, eventDate=eventDate, continent=continent, country=country, geometry=pol, limit=chunk_size, offset=offset, ) # Write the returned data to map and attribute table for res in returns["results"]: if target_crs not in latlon_crs: point = ogr.CreateGeometryFromWkt("POINT ({} {})".format( res["decimalLongitude"], res["decimalLatitude"])) point.Transform(transform) x = point.GetX() y = point.GetY() else: x = res["decimalLongitude"] y = res["decimalLatitude"] point = Point(x, y) for k in dwc_keys: if k not in list(res.keys()): res.update({k: None}) cat = cat + 1 new.write( point, cat=cat, attrs=( "{}".format(s), res["key"], res["taxonRank"], res["taxonKey"], res["taxonID"], res["scientificName"], res["species"], res["speciesKey"], res["genericName"], res["genus"], res["genusKey"], res["family"], res["familyKey"], res["order"], res["orderKey"], res["class"], res["classKey"], res["phylum"], res["phylumKey"], res["kingdom"], res["kingdomKey"], "{}".format(res["eventDate"]) if res["eventDate"] else None, "{}".format(res["verbatimEventDate"]) if res["verbatimEventDate"] else None, res["startDayOfYear"], res["endDayOfYear"], res["year"], res["month"], res["day"], res["occurrenceID"], res["occurrenceStatus"], res["occurrenceRemarks"], res["Habitat"], res["basisOfRecord"], res["preparations"], res["sex"], res["type"], res["locality"], res["verbatimLocality"], res["decimalLongitude"], res["decimalLatitude"], res["coordinateUncertaintyInMeters"], res["geodeticDatum"], res["higerGeography"], res["continent"], res["country"], res["countryCode"], res["stateProvince"], res["gbifID"], res["protocol"], res["identifier"], res["recordedBy"], res["identificationID"], ",".join(res["identifiers"]), "{}".format(res["dateIdentified"]) if res["dateIdentified"] else None, "{}".format(res["modified"]) if res["modified"] else None, res["institutionCode"], "{}".format(res["lastInterpreted"]) if res["lastInterpreted"] else None, "{}".format(res["lastParsed"]) if res["lastParsed"] else None, res["references"], ",".join(res["relations"]), res["catalogNumber"], "{}".format(res["occurrenceDetails"]) if res["occurrenceDetails"] else None, res["datasetKey"], res["datasetName"], res["collectionCode"], res["rights"], res["rightsHolder"], res["license"], res["publishingOrgKey"], res["publishingCountry"], "{}".format(res["lastCrawled"]) if res["lastCrawled"] else None, res["specificEpithet"], ",".join(res["facts"]), ",".join(res["issues"]), ",".join(res["extensions"]), res["language"], ), ) cat = cat + 1 # Close the current map if a map for each species is requested if species_maps: new.table.conn.commit() new.close() if not no_topo: grass.run_command("v.build", map=mapname, option="build") # Write history to map grass.vector_history(mapname) # Close the output map if not a map for each species is requested if (not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table): new.table.conn.commit() new.close() if not no_topo: grass.run_command("v.build", map=mapname, option="build") # Write history to map grass.vector_history(mapname)
def main(options, flags): import grass.pygrass.modules as pymod import grass.temporal as tgis from grass.pygrass.vector import VectorTopo invect = options["input"] if invect.find('@') != -1: invect = invect.split('@')[0] incol = options["date_column"] indate = options["date"] strds = options["strds"] if strds.find('@') != -1: strds_name = strds.split('@')[0] else: strds_name = strds output = options["output"] cols = options["columns"].split(',') mets = options["method"].split(',') gran = options["granularity"] dateformat = options["date_format"] separator = gscript.separator(options["separator"]) stdout = False if output != '-' and flags['u']: gscript.fatal(_("Cannot combine 'output' option and 'u' flag")) elif output != '-' and flags['c']: gscript.fatal(_("Cannot combine 'output' option and 'c' flag")) elif output == '-' and (flags['u'] or flags['c']): output = invect gscript.warning(_("Attribute table of vector {name} will be updated" "...").format(name=invect)) else: stdout = True if flags['c']: cols = [] for m in mets: colname = "{st}_{me}".format(st=strds_name, me=m) cols.append(colname) try: pymod.Module("v.db.addcolumn", map=invect, columns="{col} " "double precision".format(col=colname)) except CalledModuleError: gscript.fatal(_("Not possible to create column " "{col}".format(col=colname))) if output != '-' and len(cols) != len(mets): gscript.fatal(_("'columns' and 'method' options must have the same " "number of elements")) tgis.init() dbif = tgis.SQLDatabaseInterfaceConnection() dbif.connect() sp = tgis.open_old_stds(strds, "strds", dbif) if sp.get_temporal_type() == 'absolute': delta = int(tgis.gran_to_gran(gran, sp.get_granularity(), True)) if tgis.gran_singular_unit(gran) in ['year', 'month']: delta = int(tgis.gran_to_gran(gran, '1 day', True)) td = timedelta(delta) elif tgis.gran_singular_unit(gran) == 'day': delta = tgis.gran_to_gran(gran, sp.get_granularity(), True) td = timedelta(delta) elif tgis.gran_singular_unit(gran) == 'hour': td = timedelta(hours=delta) elif tgis.gran_singular_unit(gran) == 'minute': td = timedelta(minutes=delta) elif tgis.gran_singular_unit(gran) == 'second': td = timedelta(seconds=delta) else: if sp.get_granularity() >= int(gran): gscript.fatal(_("Input granularity is smaller or equal to the {iv}" " STRDS granularity".format(iv=strds))) td = int(gran) if incol and indate: gscript.fatal(_("Cannot combine 'date_column' and 'date' options")) elif not incol and not indate: gscript.fatal(_("You have to fill 'date_column' or 'date' option")) elif incol: try: dates = pymod.Module("db.select", flags='c', stdout_=PI, stderr_=PI, sql="SELECT DISTINCT {dc} from " "{vmap} order by {dc}".format(vmap=invect, dc=incol)) mydates = dates.outputs["stdout"].value.splitlines() except CalledModuleError: gscript.fatal(_("db.select return an error")) elif indate: mydates = [indate] pymap = VectorTopo(invect) pymap.open('r') if len(pymap.dblinks) == 0: try: pymap.close() pymod.Module("v.db.addtable", map=invect) except CalledModuleError: dbif.close() gscript.fatal(_("Unable to add table <%s> to vector map " "<%s>" % invect)) if pymap.is_open(): pymap.close() qfeat = pymod.Module("v.category", stdout_=PI, stderr_=PI, input=invect, option='print') myfeats = qfeat.outputs["stdout"].value.splitlines() if stdout: outtxt = '' for data in mydates: if sp.get_temporal_type() == 'absolute': fdata = datetime.strptime(data, dateformat) else: fdata = int(data) if flags['a']: sdata = fdata + td mwhere = "start_time >= '{inn}' and end_time < " \ "'{out}'".format(inn=fdata, out=sdata) else: sdata = fdata - td mwhere = "start_time >= '{inn}' and end_time < " \ "'{out}'".format(inn=sdata, out=fdata) lines = None try: r_what = pymod.Module("t.rast.what", points=invect, strds=strds, layout='timerow', separator=separator, flags="v", where=mwhere, quiet=True, stdout_=PI, stderr_=PI) lines = r_what.outputs["stdout"].value.splitlines() except CalledModuleError: pass if incol: try: qfeat = pymod.Module("db.select", flags='c', stdout_=PI, stderr_=PI, sql="SELECT DISTINCT cat from" " {vmap} where {dc}='{da}' order by " "cat".format(vmap=invect, da=data, dc=incol)) myfeats = qfeat.outputs["stdout"].value.splitlines() except CalledModuleError: gscript.fatal(_("db.select returned an error for date " "{da}".format(da=data))) if not lines and stdout: for feat in myfeats: outtxt += "{di}{sep}{da}".format(di=feat, da=data, sep=separator) for n in range(len(mets)): outtxt += "{sep}{val}".format(val='*', sep=separator) outtxt += "\n" if not lines: continue x = 0 for line in lines: vals = line.split(separator) if vals[0] in myfeats: try: nvals = np.array(vals[4:]).astype(np.float) except ValueError: if stdout: outtxt += "{di}{sep}{da}".format(di=vals[0], da=data, sep=separator) for n in range(len(mets)): outtxt += "{sep}{val}".format(val='*', sep=separator) outtxt += "\n" continue if stdout: outtxt += "{di}{sep}{da}".format(di=vals[0], da=data, sep=separator) for n in range(len(mets)): result = return_value(nvals, mets[n]) if stdout: outtxt += "{sep}{val}".format(val=result, sep=separator) else: try: if incol: pymod.Module("v.db.update", map=output, column=cols[n], value=str(result), where="{dc}='{da}' AND cat=" "{ca}".format(da=data, ca=vals[0], dc=incol)) else: pymod.Module("v.db.update", map=output, column=cols[n], value=str(result), where="cat={ca}".format(ca=vals[0])) except CalledModuleError: gscript.fatal(_("v.db.update return an error")) if stdout: outtxt += "\n" if x == len(myfeats): break else: x += 1 if stdout: print(outtxt)
def main(options, flags): import grass.pygrass.modules as pymod import grass.temporal as tgis from grass.pygrass.vector import VectorTopo invect = options["input"] if invect.find("@") != -1: invect = invect.split("@")[0] incol = options["date_column"] indate = options["date"] endcol = options["final_date_column"] enddate = options["final_date"] strds = options["strds"] nprocs = options["nprocs"] if strds.find("@") != -1: strds_name = strds.split("@")[0] else: strds_name = strds output = options["output"] if options["columns"]: cols = options["columns"].split(",") else: cols = [] mets = options["method"].split(",") gran = options["granularity"] dateformat = options["date_format"] separator = gscript.separator(options["separator"]) update = flags["u"] create = flags["c"] stdout = False if output != "-" and update: gscript.fatal(_("Cannot combine 'output' option and 'u' flag")) elif output != "-" and create: gscript.fatal(_("Cannot combine 'output' option and 'c' flag")) elif output == "-" and (update or create): if update and not cols: gscript.fatal(_("Please set 'columns' option")) output = invect else: stdout = True if create: cols = [] for m in mets: colname = "{st}_{me}".format(st=strds_name, me=m) cols.append(colname) try: pymod.Module( "v.db.addcolumn", map=invect, columns="{col} " "double precision".format(col=colname), ) except CalledModuleError: gscript.fatal( _("Not possible to create column " "{col}".format(col=colname))) gscript.warning( _("Attribute table of vector {name} will be updated" "...").format(name=invect)) elif update: colexist = pymod.Module("db.columns", table=invect, stdout_=PI).outputs.stdout.splitlines() for col in cols: if col not in colexist: gscript.fatal( _("Column '{}' does not exist, please create it first". format(col))) gscript.warning( _("Attribute table of vector {name} will be updated" "...").format(name=invect)) if output != "-" and len(cols) != len(mets): gscript.fatal( _("'columns' and 'method' options must have the same " "number of elements")) tgis.init() dbif = tgis.SQLDatabaseInterfaceConnection() dbif.connect() sp = tgis.open_old_stds(strds, "strds", dbif) if sp.get_temporal_type() == "absolute": if gran: delta = int(tgis.gran_to_gran(gran, sp.get_granularity(), True)) if tgis.gran_singular_unit(gran) in ["year", "month"]: delta = int(tgis.gran_to_gran(gran, "1 day", True)) td = timedelta(delta) elif tgis.gran_singular_unit(gran) == "day": delta = tgis.gran_to_gran(gran, sp.get_granularity(), True) td = timedelta(delta) elif tgis.gran_singular_unit(gran) == "hour": td = timedelta(hours=delta) elif tgis.gran_singular_unit(gran) == "minute": td = timedelta(minutes=delta) elif tgis.gran_singular_unit(gran) == "second": td = timedelta(seconds=delta) else: td = None else: if sp.get_granularity() >= int(gran): gscript.fatal( _("Input granularity is smaller or equal to the {iv}" " STRDS granularity".format(iv=strds))) td = int(gran) if incol and indate: gscript.fatal(_("Cannot combine 'date_column' and 'date' options")) elif not incol and not indate: gscript.fatal(_("You have to fill 'date_column' or 'date' option")) if incol: if endcol: mysql = "SELECT DISTINCT {dc},{ec} from {vmap} order by " "{dc}".format( vmap=invect, dc=incol, ec=endcol) else: mysql = "SELECT DISTINCT {dc} from {vmap} order by " "{dc}".format( vmap=invect, dc=incol) try: dates = pymod.Module("db.select", flags="c", stdout_=PI, stderr_=PI, sql=mysql) mydates = dates.outputs["stdout"].value.splitlines() except CalledModuleError: gscript.fatal(_("db.select return an error")) elif indate: if enddate: mydates = ["{ida}|{eda}".format(ida=indate, eda=enddate)] else: mydates = [indate] mydates = [indate] pymap = VectorTopo(invect) pymap.open("r") if len(pymap.dblinks) == 0: try: pymap.close() pymod.Module("v.db.addtable", map=invect) except CalledModuleError: dbif.close() gscript.fatal( _("Unable to add table <%s> to vector map " "<%s>" % invect)) if pymap.is_open(): pymap.close() qfeat = pymod.Module("v.category", stdout_=PI, stderr_=PI, input=invect, option="print") myfeats = qfeat.outputs["stdout"].value.splitlines() if stdout: outtxt = "" for data in mydates: try: start, final = data.split("|") except ValueError: start = data final = None if sp.get_temporal_type() == "absolute": fdata = datetime.strptime(start, dateformat) else: fdata = int(start) if final: sdata = datetime.strptime(final, dateformat) elif flags["a"]: sdata = fdata + td else: sdata = fdata fdata = sdata - td mwhere = "start_time >= '{inn}' and start_time < " "'{out}'".format( inn=fdata, out=sdata) lines = None try: r_what = pymod.Module( "t.rast.what", points=invect, strds=strds, layout="timerow", separator=separator, flags="v", where=mwhere, quiet=True, stdout_=PI, stderr_=PI, nprocs=nprocs, ) lines = r_what.outputs["stdout"].value.splitlines() except CalledModuleError: gscript.warning("t.rast.what faild with where='{}'".format(mwhere)) pass if incol: if endcol: mysql = ("SELECT DISTINCT cat from {vmap} where {dc}='{da}' " "AND {ec}='{ed}' order by cat".format(vmap=invect, da=start, dc=incol, ed=final, ec=endcol)) else: mysql = ("SELECT DISTINCT cat from {vmap} where {dc}='{da}' " "order by cat".format(vmap=invect, da=start, dc=incol)) try: qfeat = pymod.Module("db.select", flags="c", stdout_=PI, stderr_=PI, sql=mysql) myfeats = qfeat.outputs["stdout"].value.splitlines() except CalledModuleError: gscript.fatal( _("db.select returned an error for date " "{da}".format(da=start))) if not lines and stdout: for feat in myfeats: outtxt += "{di}{sep}{da}".format(di=feat, da=start, sep=separator) for n in range(len(mets)): outtxt += "{sep}{val}".format(val="*", sep=separator) outtxt += "\n" if not lines: continue x = 0 for line in lines: vals = line.split(separator) if vals[0] in myfeats: try: nvals = np.array(vals[3:]).astype(float) except ValueError: if stdout: outtxt += "{di}{sep}{da}".format(di=vals[0], da=start, sep=separator) for n in range(len(mets)): outtxt += "{sep}{val}".format(val="*", sep=separator) outtxt += "\n" continue if stdout: outtxt += "{di}{sep}{da}".format(di=vals[0], da=start, sep=separator) for n in range(len(mets)): result = None if len(nvals) == 1: result = nvals[0] elif len(nvals) > 1: result = return_value(nvals, mets[n]) if stdout: if not result: result = "*" outtxt += "{sep}{val}".format(val=result, sep=separator) else: try: if incol: mywhe = "{dc}='{da}' AND ".format(da=start, dc=incol) if endcol: mywhe += "{dc}='{da}' AND ".format( da=final, dc=endcol) mywhe += "cat={ca}".format(ca=vals[0]) pymod.Module( "v.db.update", map=output, column=cols[n], value=str(result), where=mywhe, ) else: pymod.Module( "v.db.update", map=output, column=cols[n], value=str(result), where="cat={ca}".format(ca=vals[0]), ) except CalledModuleError: gscript.fatal(_("v.db.update return an error")) if stdout: outtxt += "\n" if x == len(myfeats): break else: x += 1 if stdout: print(outtxt)
def create_maps(parsed_obs, offering, seconds_granularity, event_time): """Create vector map representing offerings and observed properties. :param parsed_obs: Observations for a given offering in geoJSON format :param offering: A collection of sensors used to conveniently group them up :param seconds_granularity: Granularity in seconds :param event_time: """ timestamp_pattern = '%Y-%m-%dT%H:%M:%S' # TODO: Timezone start_time = event_time.split('+')[0] epoch_s = int(time.mktime(time.strptime(start_time, timestamp_pattern))) end_time = event_time.split('+')[1].split('/')[1] epoch_e = int(time.mktime(time.strptime(end_time, timestamp_pattern))) for key, observation in parsed_obs.items(): run_command('g.message', message='Creating vector maps for {}...'.format(key)) map_name = '{}_{}_{}'.format(options['output'], offering, key) if ':' in map_name: map_name = '_'.join(map_name.split(':')) if '-' in map_name: map_name = '_'.join(map_name.split('-')) if '.' in map_name: map_name = '_'.join(map_name.split('.')) run_command('t.create', output=map_name, type='stvds', title='Dataset for offering {} and observed ' 'property {}'.format(offering, key), description='Vector space time dataset') free_cat = 1 points = dict() new = VectorTopo(map_name) if overwrite() is True: try: new.remove() except: pass data = json.loads(observation) cols = [(u'cat', 'INTEGER PRIMARY KEY'), (u'name', 'VARCHAR'), (u'value', 'DOUBLE')] intervals = {} for secondsStamp in range(epoch_s, epoch_e + 1, seconds_granularity): intervals.update({secondsStamp: dict()}) timestamp_pattern = 't%Y%m%dT%H%M%S' # TODO: Timezone for a in data['features']: name = a['properties']['name'] if a['properties']['name'] not in points.keys(): if new.is_open() is False: new.open('w') points.update({a['properties']['name']: free_cat}) new.write(Point(*a['geometry']['coordinates'])) free_cat += 1 for timestamp, value in a['properties'].items(): if timestamp != 'name': observationstart_time = timestamp[:-4] seconds_timestamp = int( time.mktime( time.strptime(observationstart_time, timestamp_pattern))) for interval in intervals.keys(): if interval <= seconds_timestamp < ( interval + seconds_granularity): if name in intervals[interval].keys(): intervals[interval][name].append(float(value)) else: intervals[interval].update( {name: [float(value)]}) break if new.is_open(): new.close(build=False) run_command('v.build', map=map_name, quiet=True) i = 1 layers_timestamps = list() for interval in intervals.keys(): if len(intervals[interval]) != 0: timestamp = datetime.datetime.fromtimestamp(interval).strftime( 't%Y%m%dT%H%M%S') table_name = '{}_{}_{}_{}'.format(options['output'], offering, key, timestamp) if ':' in table_name: table_name = '_'.join(table_name.split(':')) if '-' in table_name: table_name = '_'.join(table_name.split('-')) if '.' in table_name: table_name = '_'.join(table_name.split('.')) new.open('rw') db = '$GISDBASE/$LOCATION_NAME/$MAPSET/sqlite/sqlite.db' link = Link(layer=i, name=table_name, table=table_name, key='cat', database=db, driver='sqlite') new.dblinks.add(link) new.table = new.dblinks[i - 1].table() new.table.create(cols) i += 1 layers_timestamps.append(timestamp) for name, values in intervals[interval].items(): if options['method'] == 'average': aggregated_value = sum(values) / len(values) elif options['method'] == 'sum': aggregated_value = sum(values) new.table.insert( tuple([points[name], name, aggregated_value])) new.table.conn.commit() new.close(build=False) run_command('v.build', map=map_name, quiet=True) create_temporal(map_name, i, layers_timestamps)
def main(): try: from pygbif import occurrences from pygbif import species except ImportError: grass.fatal(_("Cannot import pygbif (https://github.com/sckott/pygbif)" " library." " Please install it (pip install pygbif)" " or ensure that it is on path" " (use PYTHONPATH variable).")) # Parse input options output = options['output'] mask = options['mask'] species_maps = flags['i'] no_region_limit = flags['r'] no_topo = flags['b'] print_species = flags['p'] print_species_table = flags['t'] print_species_shell = flags['g'] print_occ_number = flags['o'] allow_no_geom = flags['n'] hasGeoIssue = flags['s'] taxa_list = options['taxa'].split(',') institutionCode = options['institutioncode'] basisofrecord = options['basisofrecord'] recordedby = options['recordedby'].split(',') date_from = options['date_from'] date_to = options['date_to'] country = options['country'] continent = options['continent'] rank = options['rank'] # Define static variable #Initialize cat cat = 0 # Number of occurrences to fetch in one request chunk_size = 300 # lat/lon proj string latlon_crs = ['+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0.000,0.000,0.000', '+proj=longlat +no_defs +a=6378137 +rf=298.257223563 +towgs84=0,0,0,0,0,0,0'] # List attributes available in Darwin Core # not all attributes are returned in each request # to avoid key errors when accessing the dictionary returned by pygbif # presence of DWC keys in the returned dictionary is checked using this list # The number of keys in this list has to be equal to the number of columns # in the attribute table and the attributes written for each occurrence dwc_keys = ['key', 'taxonRank', 'taxonKey', 'taxonID', 'scientificName', 'species', 'speciesKey', 'genericName', 'genus', 'genusKey', 'family', 'familyKey', 'order', 'orderKey', 'class', 'classKey', 'phylum', 'phylumKey', 'kingdom', 'kingdomKey', 'eventDate', 'verbatimEventDate', 'startDayOfYear', 'endDayOfYear', 'year', 'month', 'day', 'occurrenceID', 'occurrenceStatus', 'occurrenceRemarks', 'Habitat', 'basisOfRecord', 'preparations', 'sex', 'type', 'locality', 'verbatimLocality', 'decimalLongitude', 'decimalLatitude', 'geodeticDatum', 'higerGeography', 'continent', 'country', 'countryCode', 'stateProvince', 'gbifID', 'protocol', 'identifier', 'recordedBy', 'identificationID', 'identifiers', 'dateIdentified', 'modified', 'institutionCode', 'lastInterpreted', 'lastParsed', 'references', 'relations', 'catalogNumber', 'occurrenceDetails', 'datasetKey', 'datasetName', 'collectionCode', 'rights', 'rightsHolder', 'license', 'publishingOrgKey', 'publishingCountry', 'lastCrawled', 'specificEpithet', 'facts', 'issues', 'extensions', 'language'] # Deinfe columns for attribute table cols = [('cat', 'INTEGER PRIMARY KEY'), ('g_search', 'varchar(100)'), ('g_key', 'integer'), ('g_taxonrank', 'varchar(50)'), ('g_taxonkey', 'integer'), ('g_taxonid', 'varchar(50)'), ('g_scientificname', 'varchar(255)'), ('g_species', 'varchar(255)'), ('g_specieskey', 'integer'), ('g_genericname', 'varchar(255)'), ('g_genus', 'varchar(50)'), ('g_genuskey', 'integer'), ('g_family', 'varchar(50)'), ('g_familykey', 'integer'), ('g_order', 'varchar(50)'), ('g_orderkey', 'integer'), ('g_class', 'varchar(50)'), ('g_classkey', 'integer'), ('g_phylum', 'varchar(50)'), ('g_phylumkey', 'integer'), ('g_kingdom', 'varchar(50)'), ('g_kingdomkey', 'integer'), ('g_eventdate', 'text'), ('g_verbatimeventdate', 'varchar(50)'), ('g_startDayOfYear', 'integer'), ('g_endDayOfYear', 'integer'), ('g_year', 'integer'), ('g_month', 'integer'), ('g_day', 'integer'), ('g_occurrenceid', 'varchar(255)'), ('g_occurrenceStatus', 'varchar(50)'), ('g_occurrenceRemarks', 'varchar(50)'), ('g_Habitat', 'varchar(50)'), ('g_basisofrecord', 'varchar(50)'), ('g_preparations', 'varchar(50)'), ('g_sex', 'varchar(50)'), ('g_type', 'varchar(50)'), ('g_locality', 'varchar(255)'), ('g_verbatimlocality', 'varchar(255)'), ('g_decimallongitude', 'double precision'), ('g_decimallatitude', 'double precision'), ('g_geodeticdatum', 'varchar(50)'), ('g_higerGeography', 'varchar(255)'), ('g_continent', 'varchar(50)'), ('g_country', 'varchar(50)'), ('g_countryCode', 'varchar(50)'), ('g_stateProvince', 'varchar(50)'), ('g_gbifid', 'varchar(255)'), ('g_protocol', 'varchar(255)'), ('g_identifier', 'varchar(50)'), ('g_recordedby', 'varchar(255)'), ('g_identificationid', 'varchar(255)'), ('g_identifiers', 'text'), ('g_dateidentified', 'text'), ('g_modified', 'text'), ('g_institutioncode', 'varchar(50)'), ('g_lastinterpreted', 'text'), ('g_lastparsed', 'text'), ('g_references', 'varchar(255)'), ('g_relations', 'text'), ('g_catalognumber', 'varchar(50)'), ('g_occurrencedetails', 'text'), ('g_datasetkey', 'varchar(50)'), ('g_datasetname', 'varchar(255)'), ('g_collectioncode', 'varchar(50)'), ('g_rights', 'varchar(255)'), ('g_rightsholder', 'varchar(255)'), ('g_license', 'varchar(50)'), ('g_publishingorgkey', 'varchar(50)'), ('g_publishingcountry', 'varchar(50)'), ('g_lastcrawled', 'text'), ('g_specificepithet', 'varchar(50)'), ('g_facts', 'text'), ('g_issues', 'text'), ('g_extensions', 'text'), ('g_language', 'varchar(50)')] set_output_encoding() # Set temporal filter if requested by user # Initialize eventDate filter eventDate = None # Check if date from is compatible (ISO compliant) if date_from: try: parse(date_from) except: grass.fatal("Invalid invalid start date provided") if date_from and not date_to: eventDate = '{}'.format(date_from) # Check if date to is compatible (ISO compliant) if date_to: try: parse(date_to) except: grass.fatal("Invalid invalid end date provided") # Check if date to is after date_from if parse(date_from) < parse(date_to): eventDate = '{},{}'.format(date_from, date_to) else: grass.fatal("Invalid date range: End date has to be after start date!") # Set filter on basisOfRecord if requested by user if basisofrecord == 'ALL': basisOfRecord = None else: basisOfRecord = basisofrecord # Allow also occurrences with spatial issues if requested by user hasGeospatialIssue = False if hasGeoIssue: hasGeospatialIssue = True # Allow also occurrences without coordinates if requested by user hasCoordinate = True if allow_no_geom: hasCoordinate = False # Set reprojection parameters # Set target projection of current LOCATION target_crs = grass.read_command('g.proj', flags='fj').rstrip(os.linesep) target = osr.SpatialReference(target_crs) target.ImportFromProj4(target_crs) if target == 'XY location (unprojected)': grass.fatal("Sorry, XY locations are not supported!") # Set source projection from GBIF source = osr.SpatialReference() source.ImportFromEPSG(4326) if target_crs not in latlon_crs: transform = osr.CoordinateTransformation(source, target) reverse_transform = osr.CoordinateTransformation(target, source) # Generate WKT polygon to use for spatial filtering if requested if mask: if len(mask.split('@')) == 2: m = VectorTopo(mask.split('@')[0], mapset=mask.split('@')[1]) else: m = VectorTopo(mask) if not m.exist(): grass.fatal('Could not find vector map <{}>'.format(mask)) m.open('r') if not m.is_open(): grass.fatal('Could not open vector map <{}>'.format(mask)) # Use map Bbox as spatial filter if map contains <> 1 area if m.number_of('areas') == 1: region_pol = [area.to_wkt() for area in m.viter("areas")][0] else: bbox = str(m.bbox()).replace('Bbox(', '').replace(' ', '').rstrip(')').split(',') region_pol = 'POLYGON(({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))'.format(bbox[2], bbox[0], bbox[3], bbox[1]) m.close() else: # Do not limit import spatially if LOCATION is able to take global data if no_region_limit: if target_crs not in latlon_crs: grass.fatal('Import of data from outside the current region is' 'only supported in a WGS84 location!') region_pol = None else: # Limit import spatially to current region # if LOCATION is !NOT! able to take global data # to avoid pprojection ERRORS region = grass.parse_command('g.region', flags='g') region_pol = 'POLYGON(({0} {1}, {0} {3}, {2} {3}, {2} {1}, {0} {1}))'.format(region['e'], region['n'], region['w'], region['s']) # Do not reproject in latlon LOCATIONS if target_crs not in latlon_crs: pol = ogr.CreateGeometryFromWkt(region_pol) pol.Transform(reverse_transform) pol = pol.ExportToWkt() else: pol = region_pol # Create output map if not output maps for each species are requested if not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table: mapname = output new = Vector(mapname) new.open('w', tab_name=mapname, tab_cols=cols) cat = 1 # Import data for each species for s in taxa_list: # Get the taxon key if not the taxon key is provided as input try: key = int(s) except: try: species_match = species.name_backbone(s, rank=rank, strict=False, verbose=True) key = species_match['usageKey'] except: grass.error('Data request for taxon {} failed. Are you online?'.format(s)) continue # Return matching taxon and alternatives and exit if print_species: print('Matching taxon for {} is:'.format(s)) print('{} {}'.format(species_match['scientificName'], species_match['status'])) if 'alternatives' in list(species_match.keys()): print('Alternative matches might be:'.format(s)) for m in species_match['alternatives']: print('{} {}'.format(m['scientificName'], m['status'])) else: print('No alternatives found for the given taxon') continue if print_species_shell: print('match={}'.format(species_match['scientificName'])) if 'alternatives' in list(species_match.keys()): alternatives = [] for m in species_match['alternatives']: alternatives.append(m['scientificName']) print('alternatives={}'.format(','.join(alternatives))) continue if print_species_table: if 'alternatives' in list(species_match.keys()): if len(species_match['alternatives']) == 0: print('{0}|{1}|{2}|'.format(s, key, species_match['scientificName'])) else: alternatives = [] for m in species_match['alternatives']: alternatives.append(m['scientificName']) print('{0}|{1}|{2}|{3}'.format(s, key, species_match['scientificName'], ','.join(alternatives))) continue try: returns_n = occurrences.search(taxonKey=key, hasGeospatialIssue=hasGeospatialIssue, hasCoordinate=hasCoordinate, institutionCode=institutionCode, basisOfRecord=basisOfRecord, recordedBy=recordedby, eventDate=eventDate, continent=continent, country=country, geometry=pol, limit=1)['count'] except: grass.error('Data request for taxon {} faild. Are you online?'.format(s)) returns_n = 0 # Exit if search does not give a return # Print only number of returns for the given search and exit if print_occ_number: grass.message('Found {0} occurrences for taxon {1}...'.format(returns_n, s)) continue elif returns_n <= 0: grass.warning('No occurrences for current search for taxon {0}...'.format(s)) continue elif returns_n >= 200000: grass.warning('Your search for {1} returns {0} records.\n' 'Unfortunately, the GBIF search API is limited to 200,000 records per request.\n' 'The download will be incomplete. Please consider to split up your search.'.format(returns_n, s)) # Get the number of chunks to download chunks = int(math.ceil(returns_n / float(chunk_size))) grass.verbose('Downloading {0} occurrences for taxon {1}...'.format(returns_n, s)) # Create a map for each species if requested using map name as suffix if species_maps: mapname = '{}_{}'.format(s.replace(' ', '_'), output) new = Vector(mapname) new.open('w', tab_name=mapname, tab_cols=cols) cat = 0 # Download the data from GBIF for c in range(chunks): # Define offset offset = c * chunk_size # Adjust chunk_size to the hard limit of 200,000 records in GBIF API # if necessary if offset + chunk_size >= 200000: chunk_size = 200000 - offset # Get the returns for the next chunk returns = occurrences.search(taxonKey=key, hasGeospatialIssue=hasGeospatialIssue, hasCoordinate=hasCoordinate, institutionCode=institutionCode, basisOfRecord=basisOfRecord, recordedBy=recordedby, eventDate=eventDate, continent=continent, country=country, geometry=pol, limit=chunk_size, offset=offset) # Write the returned data to map and attribute table for res in returns['results']: if target_crs not in latlon_crs: point = ogr.CreateGeometryFromWkt('POINT ({} {})'.format(res['decimalLongitude'], res['decimalLatitude'])) point.Transform(transform) x = point.GetX() y = point.GetY() else: x = res['decimalLongitude'] y = res['decimalLatitude'] point = Point(x, y) for k in dwc_keys: if k not in list(res.keys()): res.update({k: None}) cat = cat + 1 new.write(point, cat=cat, attrs=( '{}'.format(s), res['key'], res['taxonRank'], res['taxonKey'], res['taxonID'], res['scientificName'], res['species'], res['speciesKey'], res['genericName'], res['genus'], res['genusKey'], res['family'], res['familyKey'], res['order'], res['orderKey'], res['class'], res['classKey'], res['phylum'], res['phylumKey'], res['kingdom'], res['kingdomKey'], '{}'.format(res['eventDate']) if res['eventDate'] else None, '{}'.format(res['verbatimEventDate']) if res['verbatimEventDate'] else None, res['startDayOfYear'], res['endDayOfYear'], res['year'], res['month'], res['day'], res['occurrenceID'], res['occurrenceStatus'], res['occurrenceRemarks'], res['Habitat'], res['basisOfRecord'], res['preparations'], res['sex'], res['type'], res['locality'], res['verbatimLocality'], res['decimalLongitude'], res['decimalLatitude'], res['geodeticDatum'], res['higerGeography'], res['continent'], res['country'], res['countryCode'], res['stateProvince'], res['gbifID'], res['protocol'], res['identifier'], res['recordedBy'], res['identificationID'], ','.join(res['identifiers']), '{}'.format(res['dateIdentified']) if res['dateIdentified'] else None, '{}'.format(res['modified']) if res['modified'] else None, res['institutionCode'], '{}'.format(res['lastInterpreted']) if res['lastInterpreted'] else None, '{}'.format(res['lastParsed']) if res['lastParsed'] else None, res['references'], ','.join(res['relations']), res['catalogNumber'], '{}'.format(res['occurrenceDetails']) if res['occurrenceDetails'] else None, res['datasetKey'], res['datasetName'], res['collectionCode'], res['rights'], res['rightsHolder'], res['license'], res['publishingOrgKey'], res['publishingCountry'], '{}'.format(res['lastCrawled']) if res['lastCrawled'] else None, res['specificEpithet'], ','.join(res['facts']), ','.join(res['issues']), ','.join(res['extensions']), res['language'],)) cat = cat + 1 # Close the current map if a map for each species is requested if species_maps: new.table.conn.commit() new.close() if not no_topo: grass.run_command('v.build', map=mapname, option='build') # Close the output map if not a map for each species is requested if not species_maps and not print_species and not print_species_shell and not print_occ_number and not print_species_table: new.table.conn.commit() new.close() if not no_topo: grass.run_command('v.build', map=mapname, option='build')