def triplify_dataset(input_folder, station_id=32, start_date=20170101, end_date=20171231, output_folder='./rdf/'): lsNameSpaces, lsTriplesTemplate = readTemplate(template_ECAD) triples = [] files = [] iFiles = 0 iTriples = 0 for zipf in zips: fn = list(zipf[-6:-4].upper() + '_STAID000000.txt') fn[14-len(str(station_id)):14] = str(station_id) fn = ''.join(fn) with ZipFile(os.path.join(input_folder, zipf), 'r') as zipObj: if fn in zipObj.namelist(): zipObj.extract(fn) meteo = pd.read_csv(os.path.join(input_folder, fn), header=None, skiprows=21) meteo = meteo.astype(int) meteo = meteo[meteo[meteo.columns[2]] >= start_date] meteo = meteo[meteo[meteo.columns[2]] <= end_date] for index, row in meteo.iterrows(): obsv = {} obsv['date'] = "{}-{}-{}T12:00:00".format(str(row[2])[0:4], str(row[2])[4:6], str(row[2])[6:]) obsv['rs'] = str(row[3]/10) obsv['prop'] = obs[zipf[-6:-4]].get('observableProperty') obsv['sensor'] = 'ECAD_' + str(station_id) + '_' + obs[zipf[-6:-4]].get('sensor') URI = URIBase + 'WeatherObservation/' + "ECAD_" + str(station_id) + "_" + str(row[2]) + "_" + obsv['prop'] + ">" triplesRow = triplify(obsv, lsTriplesTemplate, URI, URI ) triples = triples + triplesRow if len(triples) > maxRecordsPerFile: file = os.path.join(output_folder, 'Weather' + '_' + str(station_id) +'_' + str(start_date) + "_" + str(end_date) + '_' + str(iFiles) + '.ttl') writeToFile(lsNameSpaces, triples, file) files.append(file) iFiles = iFiles + 1 iTriples = iTriples + len(triples) triples = [] file = os.path.join(output_folder, 'Weather' + '_' + str(station_id) +'_' + str(start_date) + "_" + str(end_date) + '_' + str(iFiles) + '.ttl') files.append(file) writeToFile(lsNameSpaces, triples, file) files.append(triplify_Station(os.path.join(input_folder, 'ECA_blend_station.txt'), station_id, output_folder)) for upfile in files: upload_webdav.upload_dataset(upfile, webdav_path, endpoint_URL, webdav_URL, endpoint_login, webdav_login, endpoint_pass, webdav_pass) return files
def triplify_dataset(output_folder, start_date="2017-04-01", end_date="2017-06-01", keyword="France", collection="Sentinel2"): startEpoch = time.time() logger.debug('Triplifying Creodias sentinel metadata') query = 'https://finder.creodias.eu/resto/api/collections/{}/search.json?q={}&startDate={}&completionDate={}&maxRecords={}'.format( collection, keyword, start_date + 'T00:00:00.000Z', end_date + 'T00:00:00.000Z', iRecordsPerPage) lsMetadataDocs = processQuery(query) lsNameSpaces, lsTriplesTemplate = readTemplate(templateFile) maxRecordsPerFile = 30000 triples = [] iFiles = 0 iTriples = 0 iObs = 1 for m in lsMetadataDocs: logger.debug(m) iObs = iObs + 1 uriDummy = URIBase + 'EarthObservation/' + m['id'] + ">" m['properties']['tile'] = m['properties']['title'][39:44] m['properties']['sensor'] = m["properties"]["instrument"] + "_" + m[ "properties"]["platform"] triplesRow = triplify(m, lsTriplesTemplate, uriDummy, "s" + m['id']) triples = triples + triplesRow logger.debug('Number of triples', len(triples)) if len(triples) > maxRecordsPerFile: file = output_folder + 'Sentinel' + str(iFiles) + '.ttl' writeToFile(lsNameSpaces, triples, file) iFiles = iFiles + 1 iTriples = iTriples + len(triples) triples = [] logger.debug('Number of triples', iTriples) file = output_folder + 'Sentinel' + str(iFiles) + '.ttl' writeToFile(lsNameSpaces, triples, file) endEpoch = time.time() elapsedTime = endEpoch - startEpoch if elapsedTime < 60: logger.debug('Elapsed time : ', elapsedTime, ' seconds') else: logger.debug('Elapsed time : ', math.floor(elapsedTime / 60), ' minutes and ', elapsedTime % 60, ' seconds')
def triplify_Station(input_file, station_id=34, output_folder='./rdf/'): stations = pd.read_csv(input_file, header=None, skiprows=19) station = stations.loc[stations[stations.columns[0]] == station_id] rdf = {} triples = [] rdf['id'] = 'ECAD_' + str(station_id) rdf['alt'] = station.iloc[0, 5] rdf['name'] = station.iloc[0, 1].strip() rdf['cn'] = station.iloc[0, 2] latd = station.iloc[0, 3] lond = station.iloc[0, 4] lat = int(latd[1:3]) + int(latd[4:6])/60 + int(latd[7:])/3600 if latd[0:1] == '-': lat = lat * -1.0; lon = int(lond[1:4]) + int(lond[5:7])/60 + int(lond[8:])/3600 if lond[0:1] == '-': lon = lon * -1.0; rdf['wkt'] = 'POINT({} {})'.format(lon, lat) lsNameSpaces, lsTriplesTemplate = readTemplate(template_Station_ECAD) URI = URIBase + 'Station/' + "ECAD_" + str(station_id) + ">" triplesRow = triplify(rdf, lsTriplesTemplate, URI, "ECAD_" + str(station_id) ) triples = triples + triplesRow for k, obsv in obs.items(): triples.append([URI, 'sosa:hosts', '{}Sensor/ECAD_{}_{}>'.format(URIBase, str(station_id), obsv.get('sensor'))]) triples.append(['{}Sensor/ECAD_{}_{}>'.format(URIBase, str(station_id), obsv.get('sensor')), 'a', 'wom:Sensor']) triples.append(['{}Sensor/ECAD_{}_{}>'.format(URIBase, str(station_id), obsv.get('sensor')), 'sosa:observes', '{}ObservableProperty/{}>'.format(URIBase, obsv.get('observableProperty'))]) triples.append(['{}ObservableProperty/{}>'.format(URIBase, obsv.get('observableProperty')), 'a', 'wom:ObservableProperty']) triples.append(['{}ObservableProperty/{}>'.format(URIBase, obsv.get('observableProperty')), 'rdfs:label', '"{}"^^xsd:string'.format(obsv.get('observableProperty'))]) file = os.path.join(output_folder, 'Station_ECAD' + '_' + str(station_id) + '.ttl') writeToFile(lsNameSpaces, triples, file) return file
def triplify_dataset(feature_file, raster_file, ftype=None, agent='IRIT', output_folder='./rdf/', threshold1=0, threshold2=0, threshold3=0): startEpoch = time.time() files = [] logger.debug('Compute and triplify raster file data') logger.debug('Raster file: ' + raster_file) logger.debug('Feature file: ' + feature_file) triples = [] iFiles = 0 iTriples = 0 rasterfile = rasterio.open(raster_file) features = gpd.read_file(feature_file) #check bounds intersection bounding_box = features.total_bounds fbox = box(bounding_box[0], bounding_box[1], bounding_box[2], bounding_box[3]) bounding_box = rasterfile.bounds rbox = box(bounding_box[0], bounding_box[1], bounding_box[2], bounding_box[3]) project = partial(pyproj.transform, pyproj.Proj(init=features.crs), pyproj.Proj(init=rasterfile.crs)) fbox = transform(project, fbox) if not fbox.intersects(rbox): logger.debug("The vector box does not intersect the raster one.") return file_name = os.path.splitext(os.path.basename(raster_file))[0] ffile_name = os.path.splitext(os.path.basename(feature_file))[0] quicklook_fn = os.path.join(output_folder, file_name + '.png') rasterfile = rasterio.open(raster_file) features = gpd.read_file(feature_file) quicklook = rasterfile.read(1, out_shape=(1, int(rasterfile.height // scale), int(rasterfile.width // scale))) mpimg.imsave(quicklook_fn, quicklook, cmap='gray') lsNameSpaces, lsTriplesTemplate = readTemplate(template_Raster_DS) try: tags = rasterfile.tags() #logger.debug(tags) raster_start_date = "{}-{}-{}".format( tags.get('Start_date')[0:4], tags.get('Start_date')[4:6], tags.get('Start_date')[6:]) if tags.get('End_date', None) != None: raster_end_date = "{}-{}-{}".format( tags.get('End_date')[0:4], tags.get('End_date')[4:6], tags.get('End_date')[6:]) else: raster_end_date = None product1 = tags.get('Product_id1', None) product2 = tags.get('Product_id2', None) agent = tags.get('Agent', agent) ds = tags.get("Category") except: logger.debug("Cannot process the files. The metadata is not valid!") logger.debug("Please check attribute name and value. Example:") logger.debug("Start_date='20170409'") logger.debug("Category='LC_DM'") logger.debug( "Product_id1='S2A_MSIL2A_20170409T105651_N0204_R094_T30TYQ_20170409T110529.SAFE'" ) return None raster_ds = {} raster_ds['productID1'] = product1 raster_ds['productID2'] = product2 raster_ds['agent'] = agent raster_ds['bg'] = datetime.strptime(raster_start_date, '%Y-%m-%d').timestamp() if raster_end_date != None: raster_ds['end'] = datetime.strptime(raster_end_date, '%Y-%m-%d').timestamp() raster_ds['crs'] = rasterfile.crs.to_string() bounding_box = rasterfile.bounds raster_ds['bbox'] = box(bounding_box[0], bounding_box[1], bounding_box[2], bounding_box[3]).wkt raster_ds['size'] = os.path.getsize(raster_file) raster_ds['creationdate'] = datetime.fromtimestamp( os.path.getmtime(raster_file)).strftime('%Y-%m-%dT%H:%M:%S') raster_ds['format'] = "application/x-geotiff" raster_ds['title'] = file_name raster_ds['description'] = "Raster file for " + ds + " - " + agent raster_ds['resolution'] = rasterfile.res[0] raster_ds['URI'] = ds + "_" + file_name raster_ds['quicklook'] = webdav_URL + file_name + '.png' #logger.debug(raster_ds['bbox']) #logger.debug(raster_ds['quicklook']) URI = URIBase + 'EOAnalysis/' + ds + "_" + file_name + ">" triplesRow = triplify(raster_ds, lsTriplesTemplate, URI, ds + "_" + file_name) triples = triples + triplesRow triples.append([ URIBase + 'GFObservedPropertyType/' + ds + '>', 'a', 'tom:GFObservedPropertyType' ]) triples.append([ URIBase + 'GFObservedPropertyType/' + ds + '>', 'tom:name', '"' + ds + '"^^xsd:string' ]) if ds[0:2] != "LC": labels = ["VeryLow", "Low", "Middle", "High"] for i in range(1, 5): triples.append([ URIBase + 'GFObservedProperty/' + ds + '_' + str(i) + '>', 'a', 'tom:GFObservedProperty' ]) triples.append([ URIBase + 'GFObservedProperty/' + ds + '_' + str(i) + '>', 'tom:hasType', URIBase + 'GFObservedPropertyType/' + ds + '>' ]) triples.append([ URIBase + 'GFObservedProperty/' + ds + '_' + str(i) + '>', 'tom:name', '"' + labels[i - 1] + "_" + ds + '"^^xsd:string' ]) else: values = np.unique(rasterfile.read(1)) for v in values: triples.append([ URIBase + 'GFObservedProperty/' + ds + '_' + str(v) + '>', 'a', 'tom:GFObservedProperty' ]) triples.append([ URIBase + 'GFObservedProperty/' + ds + '_' + str(v) + '>', 'tom:hasType', URIBase + 'GFObservedPropertyType/' + ds + '>' ]) lsNameSpaces, lsTriplesTemplate = readTemplate(template_Raster) if ds[0:2] != "LC": band = rasterfile.read(1) #logger.debug(band.shape) min_raster = band.min() max_raster = band.max() logger.debug("Max raster: " + str(max_raster)) logger.debug("Min raster: " + str(min_raster)) if threshold1 == 0 and threshold2 == 0 and threshold3 == 0: bins = np.linspace(min_raster, max_raster, num=5, endpoint=True) logger.debug("Use computed bins: " + str(bins)) else: thresholds = [ min_raster, threshold1, threshold2, threshold3, max_raster ] bins = np.array(thresholds) logger.debug("Use customed bins: " + str(bins)) outside = 0 toosmall = 0 featuresize = [] wgs84 = partial(pyproj.transform, pyproj.Proj(init=features.crs), pyproj.Proj(init='EPSG:4326')) if rasterfile.crs.to_string().lower() != features.crs.srs: logger.debug("Project features from " + features.crs.srs + " to " + rasterfile.crs.to_string()) project = partial(pyproj.transform, pyproj.Proj(init=features.crs), pyproj.Proj(init=rasterfile.crs)) for index, row in features.iterrows(): #clear_output() #logger.debug(str(index) + '/' + str(len(features))) feat = {} feat['id'] = str(row['id']) try: if rasterfile.crs.to_string().lower() != features.crs.srs: geom = transform(project, row['geometry']) else: geom = row['geometry'] featureSem, _ = mask(rasterfile, [geom], all_touched=False, crop=True, indexes=1, nodata=0) except ValueError as err: #logger.debug(err) outside = outside + 1 continue total = featureSem[featureSem > 0].size if total <= 0: #logger.debug("Too small! Change all_touch for mask to True.") toosmall = toosmall + 1 #Change mask settings featureSem, _ = mask(rasterfile, [geom], all_touched=True, crop=True, indexes=1, nodata=0) total = featureSem[featureSem > 0].size if ds[0:2] != "LC": for x in np.nditer(featureSem, op_flags=['readwrite']): if x[...] > 0: x[...] = int(np.digitize(x, bins)) #logger.debug(x) #logger.debug(featureSem) counter = Counter(featureSem.ravel()) feat['type'] = ds feat['vector'] = ftype + "_" + os.path.splitext( os.path.basename(feature_file))[0] feat['raster'] = ds + "_" + os.path.splitext( os.path.basename(raster_file))[0] feat['interval'] = raster_start_date + " " + ( raster_end_date if raster_end_date != None else raster_start_date) rStart = datetime.strptime(raster_start_date, '%Y-%m-%d') rEnd = datetime.strptime(raster_end_date, '%Y-%m-%d') if raster_end_date != None else '' feat['foi'] = ftype + "_" + feat['id'] URI = URIBase + 'GFObservationCollection/' + ds + "_" + ftype + "_" + feat[ 'id'] + "_" + str(int(rStart.timestamp())) + "_" + (str( int(rEnd.timestamp())) if raster_end_date != None else '') + ">" triplesRow = triplify( feat, lsTriplesTemplate, URI, os.path.splitext(os.path.basename(raster_file))[0]) triples = triples + triplesRow for c in counter: if c > 0: value = round(counter[c] / total, 2) semclass = int(c) GFObs = URIBase + 'GFObservation/' + ds + "_" + ftype + "_" + feat[ 'id'] + "_" + str(semclass) + "_" + str( int(rStart.timestamp())) + "_" + (str( int(rEnd.timestamp())) if raster_end_date != None else '') + ">" triples.append([GFObs, 'a', 'tom:GFObservation']) triples.append([URI, 'sosa:hasMember', GFObs]) triples.append([ GFObs, 'sosa:hasSimpleResult', '"' + str(value) + '"^^xsd:float' ]) triples.append([ GFObs, 'sosa:observedProperty', URIBase + "GFObservedProperty/" + ds + "_" + str(semclass) + ">" ]) if len(triples) > maxRecordsPerFile: fn = os.path.join( output_folder, 'Obs_' + ds + "_" + ftype + "_" + ffile_name + "_" + raster_start_date + "_" + (raster_end_date if raster_end_date != None else '') + "_" + str(iFiles) + '.ttl') #logger.debug(file) logger.debug("Writing file: " + fn) writeToFile(lsNameSpaces, triples, fn) files.append(fn) iFiles = iFiles + 1 iTriples = iTriples + len(triples) triples = [] #show(parcelChange) fn = os.path.join( output_folder, 'Obs_' + ds + "_" + ftype + "_" + ffile_name + "_" + raster_start_date + "_" + (raster_end_date if raster_end_date != None else '') + "_" + str(iFiles) + '.ttl') logger.debug("Writing file: " + fn) files.append(fn) writeToFile(lsNameSpaces, triples, fn) #clear_output() iTriples = iTriples + len(triples) logger.debug('Number of triples: ' + str(iTriples)) files2 = unit.triplify_dataset(feature_file, ftype, output_folder) #logger.debug('Number of parcel', len(features)) #logger.debug('Too small', toosmall) #logger.debug('Outside', outside) #logger.debug('size', featuresize) upload_webdav.upload_webdav(quicklook_fn, webdav_URL, webdav_login, webdav_pass) for upfile in files + files2: upload_webdav.upload_dataset(upfile, webdav_path, endpoint_URL, webdav_URL, endpoint_login, webdav_login, endpoint_pass, webdav_pass) logger.debug(files + [quicklook_fn]) return files + files2 + [quicklook_fn]
def triplify_dataset(feature_file, feature_type, output_folder='./rdf/'): startEpoch = time.time() logger.debug('Triplifying features information') #logger.debug("Reading file: "+ feature_file) triples = [] files = [] iFiles = 0 iTriples = 0 ds = {} file_name = os.path.splitext(os.path.basename(feature_file))[0] features = gpd.read_file(feature_file) #logger.debug("Feature CRS: "+ features.crs.srs) #logger.debug("Total of features: "+ str(len(features))) project_flag = 0 if features.crs.srs != "epsg:4326": #logger.debug("Project from "+ features.crs.srs + "to EPSG:4326") wgs84 = partial(pyproj.transform, pyproj.Proj(init=features.crs), pyproj.Proj(init='EPSG:4326')) project_flag = 1 bounding_box = features.total_bounds ds['bbox'] = box(bounding_box[0], bounding_box[1], bounding_box[2], bounding_box[3]).wkt ds['crs'] = features.crs.srs[5:] ds['size'] = os.path.getsize(feature_file) ds['creationdate'] = datetime.fromtimestamp( os.path.getmtime(feature_file)).strftime('%Y-%m-%dT%H:%M:%S') ds['format'] = "application/octet-stream" if os.path.splitext(feature_file)[1] == "geojson": ds['format'] = "json" ds['title'] = file_name ds['description'] = "Vector file for" + feature_type lsNameSpaces, lsTriplesTemplate = readTemplate(template_vector_DS) URI = URIBase + 'Dataset/' + feature_type + "_" + file_name + ">" triplesRow = triplify(ds, lsTriplesTemplate, URI, feature_type + "_" + file_name) triples = triples + triplesRow triples.append([ URIBase + 'GeoFeatureType/' + feature_type + '>', 'a', 'tom:GeoFeatureType' ]) triples.append([ URIBase + 'GeoFeatureType/' + feature_type + '>', 'tom:name', '"' + feature_type + '"^^xsd:string' ]) lsNameSpaces, lsTriplesTemplate = readTemplate(template_vector) for index, row in features.iterrows(): if row['geometry'] is not None: feat = {} feat['id'] = row.get('id', 'null') feat['wkt'] = row['geometry'] if project_flag: feat['wkt'] = transform(project, feat['wkt']) feat['type'] = feature_type URI = URIBase + 'GeoFeature/' + feature_type + "_" + feat[ 'id'] + ">" triplesRow = triplify(feat, lsTriplesTemplate, URI, feature_type + "_" + feat['id']) triples = triples + triplesRow if len(triples) > maxRecordsPerFile: file = os.path.join( output_folder, feature_type + '_' + datetime.now().strftime("%m-%d-%Y-%H-%M") + '_' + str(iFiles) + '.ttl') logger.debug("Writing file: " + file) writeToFile(lsNameSpaces, triples, file) files.append(file) iFiles = iFiles + 1 iTriples = iTriples + len(triples) triples = [] file = os.path.join( output_folder, feature_type + '_' + datetime.now().strftime("%m-%d-%Y-%H-%M") + '_' + str(iFiles) + '.ttl') files.append(file) logger.debug("Writing file: " + file) logger.debug('Number of triples: ' + str(iTriples)) writeToFile(lsNameSpaces, triples, file) return files