def check_message(self, connector, host, secret_key, resource, parameters): # First, check if we have the correct sensor type md = download_metadata(connector, host, secret_key, resource['parent']['id']) ds_info = get_info(connector, host, secret_key, resource['parent']['id']) sensortype = self.determineSensorType(ds_info['name']) if sensortype in ["ndvi", "pri"]: if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: logging.info("skipping dataset %s, already processed" % resource['id']) return CheckMessage.ignore # Check if output already exists timestamp = ds_info['name'].split(" - ")[1] out_file = self.get_sensor_path(timestamp, opts=['extracted_values']) if os.path.isfile(out_file) and not self.overwrite: logging.info("skipping %s, outputs already exist" % resource['id']) return CheckMessage.ignore return CheckMessage.download else: return CheckMessage.ignore
def process_message_individual(self, connector, host, secret_key, resource, parameters): """This is deprecated method that operates on single capture, not field mosaic""" self.start_message() input_image = resource['local_paths'][0] # Create output in same directory as input, but check name ds_md = get_info(connector, host, secret_key, resource['parent']['id']) terra_md = get_terraref_metadata( download_metadata(connector, host, secret_key, resource['parent']['id']), 'stereoTop') dataset_name = ds_md['name'] timestamp = dataset_name.split(" - ")[1] # Is this left or right half? side = 'left' if resource['name'].find("_left") > -1 else 'right' gps_bounds = geojson_to_tuples( terra_md['spatial_metadata'][side]['bounding_box']) out_csv = self.sensors.create_sensor_path(timestamp, opts=[side], ext='csv') out_dgci = out_csv.replace(".csv", "_dgci.png") out_edge = out_csv.replace(".csv", "_edge.png") out_label = out_csv.replace(".csv", "_label.png") out_dgci_tif = out_dgci.replace('.png', '.tif') out_edge_tif = out_edge.replace('.png', '.tif') out_label_tif = out_label.replace('.png', '.tif') self.generate_all_outputs(input_image, out_csv, out_dgci, out_edge, out_label, gps_bounds) fileids = [] for file_to_upload in [ out_csv, out_dgci_tif, out_edge_tif, out_label_tif ]: if os.path.isfile(file_to_upload): if file_to_upload not in resource['local_paths']: # TODO: Should this be written to a separate dataset? #target_dsid = build_dataset_hierarchy(connector, host, secret_key, self.clowderspace, # self.sensors.get_display_name(), # timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=dataset_name) # Send output to Clowder source dataset fileids.append( upload_to_dataset(connector, host, secret_key, resource['parent']['id'], file_to_upload)) self.created += 1 self.bytes += os.path.getsize(file_to_upload) # Add metadata to original dataset indicating this was run ext_meta = build_metadata(host, self.extractor_info, resource['parent']['id'], {"files_created": fileids}, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() inPath = resource['local_paths'][0] # Determine output file path ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] out_file = self.create_sensor_path(timestamp, opts=['extracted_values']) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy( connector, host, secret_key, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[:7], timestamp[:10], leaf_ds_name=resource['dataset_info']['name']) # Extract NDVI values if not os.path.isfile(out_file) or self.overwrite: logging.info("...writing values to: %s" % out_file) data = open(inPath, "rb").read() values = float(data[49:66]) data.close() with open(out_file, 'wb') as csvfile: fields = ['file_name', 'NDVI'] # fields name for csv file wr = csv.DictWriter(csvfile, fieldnames=fields, lineterminator='\n') wr.writeheader() wr.writerow({'file_name': resource['name'], 'NDVI': values}) # TODO: Send this to geostreams fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(out_file) else: logging.info("%s already exists; skipping %s" % (out_file, resource['id'])) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, metadata) self.end_message()
def check_message_individual(self, connector, host, secret_key, resource, parameters): """This is deprecated method that operates on single capture, not field mosaic""" ds_md = get_info(connector, host, secret_key, resource['parent']['id']) s = Sensors('', 'ua-mac', 'rgb_geotiff') if ds_md['name'].find(s.get_display_name()) > -1: timestamp = ds_md['name'].split(" - ")[1] side = 'left' if resource['name'].find("_left") > -1 else 'right' out_csv = self.sensors.get_sensor_path(timestamp, opts=[side], ext='csv') if not os.path.exists(out_csv) or self.overwrite: return CheckMessage.download else: logging.info("output file already exists; skipping %s" % resource['id']) return CheckMessage.ignore
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) terra_md = resource['metadata'] ds_info = get_info(connector, host, secret_key, resource['id']) # @begin extract_positional_info_from_metadata # @in new_dataset_added # @out gantry_geometry # @end extract_positional_info # Get sensor from datasetname self.log_info(resource, "Getting position information from metadata") (streamprefix, timestamp) = ds_info['name'].split(' - ') date = timestamp.split("__")[0] scan_time = calculate_scan_time(terra_md) streamprefix += " Datasets" dpmetadata = { "source_dataset": host + ("" if host.endswith("/") else "/") + \ "datasets/" + resource['id'], "dataset_name": ds_info['name'] } centroid = None bbox = None for entry in terra_md['spatial_metadata']: if 'centroid' in terra_md['spatial_metadata'][entry]: centroid = terra_md['spatial_metadata'][entry]['centroid'] if 'bounding_box' in terra_md['spatial_metadata'][entry]: bbox = terra_md['spatial_metadata'][entry]['bounding_box'] bbox = { "type": bbox['type'], "coordinates": [bbox['coordinates']] } if 'site_metadata' in terra_md: # We've already determined the plot associated with this dataset so we can skip some work self.log_info( resource, "Creating datapoint without lookup in %s" % streamprefix) create_datapoint_with_dependencies( connector, host, secret_key, streamprefix, centroid, scan_time, scan_time, dpmetadata, date, bbox, terra_md['site_metadata']['sitename']) else: # We need to do the traditional querying for plot self.log_info( resource, "Creating datapoint with lookup in %s" % streamprefix) create_datapoint_with_dependencies(connector, host, secret_key, streamprefix, centroid, scan_time, scan_time, dpmetadata, date, bbox) # Attach geometry to Clowder metadata as well self.log_info(resource, "Uploading dataset metadata") ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"datapoints_added": 1}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() tmp_csv = "canopycovertraits.csv" csv_file = open(tmp_csv, 'w') (fields, traits) = ccCore.get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') # Get full list of experiment plots using date as filter logging.info(connector) logging.info(host) logging.info(secret_key) logging.info(resource) ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot try: (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) if len(pxarray.shape) < 3: logging.error("unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue ccVal = ccCore.gen_cc_for_img(rollaxis(pxarray, 0, 3), 5) ccVal *= 100.0 # Make 0-100 instead of 0-1 successful_plots += 1 if successful_plots % 10 == 0: logging.info("processed %s/%s plots successfully" % (successful_plots, len(all_plots))) except: logging.error("error generating cc for %s" % plotname) continue traits['canopy_cover'] = str(ccVal) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = ccCore.generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], "canopy_cover": ccVal } create_datapoint_with_dependencies( connector, host, secret_key, "Canopy Cover", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) # submit CSV to BETY csv_file.close() submit_traits(tmp_csv, betykey=self.bety_key) # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots, "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=canopy_cover" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Write the CSV to the same directory as the source file ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] time_fmt = timestamp + "T12:00:00-07:00" rootdir = self.sensors.create_sensor_path(timestamp, sensor="rgb_fullfield", ext=".csv") out_csv = os.path.join( os.path.dirname(rootdir), resource['name'].replace(".tif", "_canopycover_bety.csv")) out_geo = os.path.join( os.path.dirname(rootdir), resource['name'].replace(".tif", "_canopycover_geo.csv")) # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata... self.log_info(resource, "Writing BETY CSV to %s" % out_csv) csv_file = open(out_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo) geo_file = open(out_geo, 'w') geo_file.write(','.join([ 'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp' ]) + '\n') # Get full list of experiment plots using date as filter all_plots = get_site_boundaries(timestamp, city='Maricopa') self.log_info(resource, "found %s plots on %s" % (len(all_plots), timestamp)) successful_plots = 0 for plotname in all_plots: if plotname.find("KSU") > -1: self.log_info(resource, "skipping %s" % plotname) continue bounds = all_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds)) centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] # Use GeoJSON string to clip full field to this plot try: pxarray = clip_raster(resource['local_paths'][0], tuples) if pxarray is not None: if len(pxarray.shape) < 3: self.log_error( resource, "unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue ccVal = calculate_canopycover_masked( rollaxis(pxarray, 0, 3)) if (ccVal > -1): # Prepare and submit datapoint geo_file.write(','.join([ plotname, 'Canopy Cover', str(centroid_lonlat[1]), str(centroid_lonlat[0]), time_fmt, host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], str(ccVal), timestamp ]) + '\n') successful_plots += 1 if successful_plots % 10 == 0: self.log_info( resource, "processed %s/%s plots" % (successful_plots, len(all_plots))) else: continue except: self.log_error(resource, "error generating cc for %s" % plotname) continue if (ccVal > -1): traits['canopy_cover'] = str(ccVal) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') csv_file.close() geo_file.close() # Upload this CSV to Clowder fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_csv) geoid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_geo) # Add metadata to original dataset indicating this was run self.log_info(resource, "updating file metadata") ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"files_created": [fileid, geoid]}, 'file') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) # Trigger separate extractors self.log_info(resource, "triggering BETY extractor on %s" % fileid) submit_extraction(connector, host, secret_key, fileid, "terra.betydb") self.log_info(resource, "triggering geostreams extractor on %s" % geoid) submit_extraction(connector, host, secret_key, geoid, "terra.geostreams") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] time_fmt = timestamp+"T12:00:00-07:00" out_csv = self.sensors.create_sensor_path(timestamp, sensor="ir_meantemp", opts=["bety"]) out_geo = out_csv.replace("_bety", "_geo") # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata... self.log_info(resource, "Writing BETY CSV to %s" % out_csv) csv_file = open(out_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo) geo_file = open(out_geo, 'w') geo_file.write(','.join(['site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp']) + '\n') successful_plots = 0 nan_plots = 0 all_plots = get_site_boundaries(timestamp, city='Maricopa') for plotname in all_plots: if plotname.find("KSU") > -1: self.log_info(resource, "skipping %s" % plotname) continue bounds = all_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds)) centroid_lonlat = json.loads(centroid_from_geojson(bounds))["coordinates"] # Use GeoJSON string to clip full field to this plot pxarray = clip_raster(resource['local_paths'][0], tuples) # Filter out any pxarray[pxarray < 0] = numpy.nan mean_tc = numpy.nanmean(pxarray) - 273.15 # Create BETY-ready CSV if not numpy.isnan(mean_tc): geo_file.write(','.join([plotname, 'IR Surface Temperature', str(centroid_lonlat[1]), str(centroid_lonlat[0]), time_fmt, host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], str(mean_tc), timestamp]) + '\n') traits['surface_temperature'] = str(mean_tc) traits['site'] = plotname traits['local_datetime'] = timestamp+"T12:00:00" trait_list = generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') else: nan_plots += 1 successful_plots += 1 self.log_info(resource, "skipped %s of %s plots due to NaN" % (nan_plots, len(all_plots))) # submit CSV to BETY csv_file.close() geo_file.close() # Upload CSVs to Clowder fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_csv) geoid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_geo) # Tell Clowder this is completed so subsequent file updates don't daisy-chain self.log_info(resource, "updating file metadata") metadata = build_metadata(host, self.extractor_info, resource['parent']['id'], { "total_plots": len(all_plots), "plots_processed": successful_plots, "blank_plots": nan_plots, "files_created": [fileid, geoid], "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata) # Trigger downstream extractors self.log_info(resource, "triggering BETY extractor on %s" % fileid) submit_extraction(connector, host, secret_key, fileid, "terra.betydb") self.log_info(resource, "triggering geostreams extractor on %s" % geoid) submit_extraction(connector, host, secret_key, geoid, "terra.geostreams") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() tmp_csv = "meantemptraits.csv" csv_file = open(tmp_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) dsmd = download_metadata(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) #tc = getFlir.rawData_to_temperature(pxarray, terramd) # get temperature # Filter out any pxarray[pxarray < 0] = numpy.nan mean_tc = numpy.nanmean(pxarray) - 273.15 # Create BETY-ready CSV if not numpy.isnan(mean_tc): traits['surface_temperature'] = str(mean_tc) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = generate_traits_list(traits) #generate_csv(tmp_csv, fields, trait_list) csv_file.write(','.join(map(str, trait_list)) + '\n') # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], "surface_temperature": str(mean_tc) } create_datapoint_with_dependencies( connector, host, secret_key, "IR Surface Temperature", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) successful_plots += 1 # submit CSV to BETY csv_file.close() submit_traits(tmp_csv, betykey=self.bety_key) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots, "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot try: (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) if len(pxarray.shape) < 3: logging.error("unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue plot_img = create_image(pxarray, "plot_image.png") plot_csv = "plot.csv" self.generate_table_only(plot_img, plot_csv) trait_vals = self.extract_vals_from_csv(plot_csv) successful_plots += 1 if successful_plots % 10 == 0: logging.info("processed %s/%s plots successfully" % (successful_plots, len(all_plots))) except: logging.error("error generating traits for %s" % plotname) continue # Create BETY-ready CSV (fields, traits) = self.get_traits_table() for tr in trait_vals: traits[tr] = str(trait_vals[tr]) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12-00-00-000" trait_list = self.generate_traits_list(traits) self.generate_cc_csv(plot_csv, fields, trait_list) # submit CSV to BETY submit_traits(plot_csv, self.bety_key) # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + "files/" + resource['id'], } for tr in trait_vals: dpmetadata[tr] = str(trait_vals[tr]) create_datapoint_with_dependencies( connector, host, secret_key, "Canopy Cover", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) os.remove(plot_img) os.remove(plot_csv) # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots # TODO: add link to BETY trait IDs }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()