def update_geostreams(connector, host, secret_key, geo_csv_header, geo_rows): """Sends the rows of csv data to geostreams Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API geo_csv_header(str): comma separated list of column headers geo_rows(list): list of strings that are comma separated column data (list of data rows) Notes: Header names expected are: 'lat', 'lon', 'dp_time', 'timestamp', 'source', 'value', and 'trait' """ data = [geo_csv_header] data.extend(geo_rows) reader = csv.DictReader(data) idx = 1 for row in reader: centroid_lonlat = [row['lon'], row['lat']] time_fmt = row['dp_time'] timestamp = row['timestamp'] dpmetadata = {"source": row['source'], "value": row['value']} trait = row['trait'] idx += 1 create_datapoint_with_dependencies( connector, host, secret_key, trait, (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) successful_plots = 0 with open(resource['local_paths'][0], 'rb') as csvfile: reader = csv.DictReader(csvfile) for row in reader: centroid_lonlat = [row['lon'], row['lat']] time_fmt = row['dp_time'] timestamp = row['timestamp'] dpmetadata = { "source": row['source'], "value": row['value'] } trait = row['trait'] create_datapoint_with_dependencies(connector, host, secret_key, trait, (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) successful_plots += 1 # Add metadata to original dataset indicating this was run self.log_info(resource, "updating file metadata (%s)" % resource['id']) ext_meta = build_metadata(host, self.extractor_info, resource['id'], { "plots_processed": successful_plots, }, 'file') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def upload_to_geostreams(file, clowder_id): conn = Connector( None, mounted_paths={"/home/clowder/sites": "/home/clowder/sites"}) successful_plots = 0 with open(file, 'rb') as csvfile: reader = csv.DictReader(csvfile) for row in reader: centroid_lonlat = [row['lon'], row['lat']] time_fmt = row['dp_time'] timestamp = row['timestamp'] dpmetadata = {"source": row['source'], "value": row['value']} trait = row['trait'] create_datapoint_with_dependencies( conn, host, secret_key, trait, (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) successful_plots += 1 # Extractor metadata extractor_info = { "extractor_name": "terra.geostreams", "extractor_version": "1.0", "extractor_author": "Max Burnette <*****@*****.**>", "extractor_description": "Geostreams CSV uploader", "extractor_repo": "https://github.com/terraref/computing-pipeline.git" } # Add metadata to original dataset indicating this was run ext_meta = build_metadata(host, extractor_info, clowder_id, { "plots_processed": successful_plots, }, 'file') upload_metadata(conn, host, secret_key, clowder_id, ext_meta)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) terra_md = resource['metadata'] ds_info = get_info(connector, host, secret_key, resource['id']) # @begin extract_positional_info_from_metadata # @in new_dataset_added # @out gantry_geometry # @end extract_positional_info # Get sensor from datasetname self.log_info(resource, "Getting position information from metadata") (streamprefix, timestamp) = ds_info['name'].split(' - ') date = timestamp.split("__")[0] scan_time = calculate_scan_time(terra_md) streamprefix += " Datasets" dpmetadata = { "source_dataset": host + ("" if host.endswith("/") else "/") + \ "datasets/" + resource['id'], "dataset_name": ds_info['name'] } centroid = None bbox = None for entry in terra_md['spatial_metadata']: if 'centroid' in terra_md['spatial_metadata'][entry]: centroid = terra_md['spatial_metadata'][entry]['centroid'] if 'bounding_box' in terra_md['spatial_metadata'][entry]: bbox = terra_md['spatial_metadata'][entry]['bounding_box'] bbox = { "type": bbox['type'], "coordinates": [bbox['coordinates']] } if 'site_metadata' in terra_md: # We've already determined the plot associated with this dataset so we can skip some work self.log_info( resource, "Creating datapoint without lookup in %s" % streamprefix) create_datapoint_with_dependencies( connector, host, secret_key, streamprefix, centroid, scan_time, scan_time, dpmetadata, date, bbox, terra_md['site_metadata']['sitename']) else: # We need to do the traditional querying for plot self.log_info( resource, "Creating datapoint with lookup in %s" % streamprefix) create_datapoint_with_dependencies(connector, host, secret_key, streamprefix, centroid, scan_time, scan_time, dpmetadata, date, bbox) # Attach geometry to Clowder metadata as well self.log_info(resource, "Uploading dataset metadata") ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"datapoints_added": 1}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() tmp_csv = "canopycovertraits.csv" csv_file = open(tmp_csv, 'w') (fields, traits) = ccCore.get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') # Get full list of experiment plots using date as filter logging.info(connector) logging.info(host) logging.info(secret_key) logging.info(resource) ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot try: (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) if len(pxarray.shape) < 3: logging.error("unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue ccVal = ccCore.gen_cc_for_img(rollaxis(pxarray, 0, 3), 5) ccVal *= 100.0 # Make 0-100 instead of 0-1 successful_plots += 1 if successful_plots % 10 == 0: logging.info("processed %s/%s plots successfully" % (successful_plots, len(all_plots))) except: logging.error("error generating cc for %s" % plotname) continue traits['canopy_cover'] = str(ccVal) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = ccCore.generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], "canopy_cover": ccVal } create_datapoint_with_dependencies( connector, host, secret_key, "Canopy Cover", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) # submit CSV to BETY csv_file.close() submit_traits(tmp_csv, betykey=self.bety_key) # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots, "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=canopy_cover" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() tmp_csv = "meantemptraits.csv" csv_file = open(tmp_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) dsmd = download_metadata(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) #tc = getFlir.rawData_to_temperature(pxarray, terramd) # get temperature # Filter out any pxarray[pxarray < 0] = numpy.nan mean_tc = numpy.nanmean(pxarray) - 273.15 # Create BETY-ready CSV if not numpy.isnan(mean_tc): traits['surface_temperature'] = str(mean_tc) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = generate_traits_list(traits) #generate_csv(tmp_csv, fields, trait_list) csv_file.write(','.join(map(str, trait_list)) + '\n') # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], "surface_temperature": str(mean_tc) } create_datapoint_with_dependencies( connector, host, secret_key, "IR Surface Temperature", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) successful_plots += 1 # submit CSV to BETY csv_file.close() submit_traits(tmp_csv, betykey=self.bety_key) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots, "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot try: (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) if len(pxarray.shape) < 3: logging.error("unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue plot_img = create_image(pxarray, "plot_image.png") plot_csv = "plot.csv" self.generate_table_only(plot_img, plot_csv) trait_vals = self.extract_vals_from_csv(plot_csv) successful_plots += 1 if successful_plots % 10 == 0: logging.info("processed %s/%s plots successfully" % (successful_plots, len(all_plots))) except: logging.error("error generating traits for %s" % plotname) continue # Create BETY-ready CSV (fields, traits) = self.get_traits_table() for tr in trait_vals: traits[tr] = str(trait_vals[tr]) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12-00-00-000" trait_list = self.generate_traits_list(traits) self.generate_cc_csv(plot_csv, fields, trait_list) # submit CSV to BETY submit_traits(plot_csv, self.bety_key) # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + "files/" + resource['id'], } for tr in trait_vals: dpmetadata[tr] = str(trait_vals[tr]) create_datapoint_with_dependencies( connector, host, secret_key, "Canopy Cover", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) os.remove(plot_img) os.remove(plot_csv) # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots # TODO: add link to BETY trait IDs }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()