def update_dataset_extractor_metadata(self, connector, host, key, dsid, metadata,\ extractor_name): """Adds or replaces existing dataset metadata for the specified extractor Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection key(str): used with the host API dsid(str): the dataset to update metadata(str): the metadata string to update the dataset with extractor_name(str): the name of the extractor to associate the metadata with """ meta = build_metadata(host, self.extractor_info, dsid, metadata, "dataset") try: md = ds.download_metadata(connector, host, key, dsid, extractor_name) md_len = len(md) except Exception as ex: # pylint: disable=broad-except md_len = 0 logging.debug(ex.message) if md_len > 0: ds.remove_metadata(connector, host, key, dsid, extractor_name) ds.upload_metadata(connector, host, key, dsid, meta)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) successful_plots = 0 with open(resource['local_paths'][0], 'rb') as csvfile: reader = csv.DictReader(csvfile) for row in reader: centroid_lonlat = [row['lon'], row['lat']] time_fmt = row['dp_time'] timestamp = row['timestamp'] dpmetadata = { "source": row['source'], "value": row['value'] } trait = row['trait'] create_datapoint_with_dependencies(connector, host, secret_key, trait, (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) successful_plots += 1 # Add metadata to original dataset indicating this was run self.log_info(resource, "updating file metadata (%s)" % resource['id']) ext_meta = build_metadata(host, self.extractor_info, resource['id'], { "plots_processed": successful_plots, }, 'file') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) with open(resource['local_paths'][0], 'r') as inputcsv: inputlines = inputcsv.readlines() if len(inputlines) <= 1: # first check if there is data besides header line self.log_info(resource, "no trait lines found in CSV; skipping upload") else: # submit CSV to BETY self.log_info( resource, "found %s trait lines; submitting CSV to bety" % str(len(inputlines) - 1)) submit_traits(resource['local_paths'][0], betykey=self.bety_key) # Add metadata to original dataset indicating this was run self.log_info(resource, "updating file metadata (%s)" % resource['id']) ext_meta = build_metadata(host, self.extractor_info, resource['id'], {}, 'file') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def upload_to_geostreams(file, clowder_id): conn = Connector( None, mounted_paths={"/home/clowder/sites": "/home/clowder/sites"}) successful_plots = 0 with open(file, 'rb') as csvfile: reader = csv.DictReader(csvfile) for row in reader: centroid_lonlat = [row['lon'], row['lat']] time_fmt = row['dp_time'] timestamp = row['timestamp'] dpmetadata = {"source": row['source'], "value": row['value']} trait = row['trait'] create_datapoint_with_dependencies( conn, host, secret_key, trait, (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) successful_plots += 1 # Extractor metadata extractor_info = { "extractor_name": "terra.geostreams", "extractor_version": "1.0", "extractor_author": "Max Burnette <*****@*****.**>", "extractor_description": "Geostreams CSV uploader", "extractor_repo": "https://github.com/terraref/computing-pipeline.git" } # Add metadata to original dataset indicating this was run ext_meta = build_metadata(host, extractor_info, clowder_id, { "plots_processed": successful_plots, }, 'file') upload_metadata(conn, host, secret_key, clowder_id, ext_meta)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) f = resource['local_paths'][0] self.log_info(resource, "determining image quality") qual = getImageQuality(f) self.log_info(resource, "creating output image") md = download_ds_metadata(connector, host, secret_key, resource['parent']['id']) terramd = get_terraref_metadata(md) if "left" in f: bounds = geojson_to_tuples( terramd['spatial_metadata']['left']['bounding_box']) else: bounds = geojson_to_tuples( terramd['spatial_metadata']['right']['bounding_box']) output = f.replace(".tif", "_nrmac.tif") create_geotiff(np.array([[qual, qual], [qual, qual]]), bounds, output) upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], output) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"quality_score": qual}, 'file') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def process_message_individual(self, connector, host, secret_key, resource, parameters): """This is deprecated method that operates on single capture, not field mosaic""" self.start_message() input_image = resource['local_paths'][0] # Create output in same directory as input, but check name ds_md = get_info(connector, host, secret_key, resource['parent']['id']) terra_md = get_terraref_metadata( download_metadata(connector, host, secret_key, resource['parent']['id']), 'stereoTop') dataset_name = ds_md['name'] timestamp = dataset_name.split(" - ")[1] # Is this left or right half? side = 'left' if resource['name'].find("_left") > -1 else 'right' gps_bounds = geojson_to_tuples( terra_md['spatial_metadata'][side]['bounding_box']) out_csv = self.sensors.create_sensor_path(timestamp, opts=[side], ext='csv') out_dgci = out_csv.replace(".csv", "_dgci.png") out_edge = out_csv.replace(".csv", "_edge.png") out_label = out_csv.replace(".csv", "_label.png") out_dgci_tif = out_dgci.replace('.png', '.tif') out_edge_tif = out_edge.replace('.png', '.tif') out_label_tif = out_label.replace('.png', '.tif') self.generate_all_outputs(input_image, out_csv, out_dgci, out_edge, out_label, gps_bounds) fileids = [] for file_to_upload in [ out_csv, out_dgci_tif, out_edge_tif, out_label_tif ]: if os.path.isfile(file_to_upload): if file_to_upload not in resource['local_paths']: # TODO: Should this be written to a separate dataset? #target_dsid = build_dataset_hierarchy(connector, host, secret_key, self.clowderspace, # self.sensors.get_display_name(), # timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=dataset_name) # Send output to Clowder source dataset fileids.append( upload_to_dataset(connector, host, secret_key, resource['parent']['id'], file_to_upload)) self.created += 1 self.bytes += os.path.getsize(file_to_upload) # Add metadata to original dataset indicating this was run ext_meta = build_metadata(host, self.extractor_info, resource['parent']['id'], {"files_created": fileids}, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() inPath = resource['local_paths'][0] # Determine output file path ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] out_file = self.create_sensor_path(timestamp, opts=['extracted_values']) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy( connector, host, secret_key, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[:7], timestamp[:10], leaf_ds_name=resource['dataset_info']['name']) # Extract NDVI values if not os.path.isfile(out_file) or self.overwrite: logging.info("...writing values to: %s" % out_file) data = open(inPath, "rb").read() values = float(data[49:66]) data.close() with open(out_file, 'wb') as csvfile: fields = ['file_name', 'NDVI'] # fields name for csv file wr = csv.DictWriter(csvfile, fieldnames=fields, lineterminator='\n') wr.writeheader() wr.writerow({'file_name': resource['name'], 'NDVI': values}) # TODO: Send this to geostreams fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(out_file) else: logging.info("%s already exists; skipping %s" % (out_file, resource['id'])) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, metadata) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Put files alongside .nc file out_dir = os.path.dirname(resource['local_paths'][0]) out_fname_root = resource['name'].replace('.nc', '') metaFilePath = os.path.join(out_dir, out_fname_root + '_metadata.cdl') if not os.path.isfile(metaFilePath) or self.overwrite: logging.info('...extracting metadata in cdl format: %s' % metaFilePath) with open(metaFilePath, 'w') as fmeta: subprocess.call( ['ncks', '--cdl', '-m', '-M', resource['local_paths'][0]], stdout=fmeta) self.created += 1 self.bytes += os.path.getsize(metaFilePath) upload_to_dataset(connector, host, secret_key, resource['parent']['id'], metaFilePath) metaFilePath = os.path.join(out_dir, out_fname_root + '._metadataxml') if not os.path.isfile(metaFilePath) or self.overwrite: logging.info('...extracting metadata in xml format: %s' % metaFilePath) with open(metaFilePath, 'w') as fmeta: subprocess.call( ['ncks', '--xml', '-m', '-M', resource['local_paths'][0]], stdout=fmeta) self.created += 1 self.bytes += os.path.getsize(metaFilePath) upload_to_dataset(connector, host, secret_key, resource['parent']['id'], metaFilePath) metaFilePath = os.path.join(out_dir, out_fname_root + '._metadata.json') if not os.path.isfile(metaFilePath) or self.overwrite: logging.info('...extracting metadata in json format: %s' % metaFilePath) with open(metaFilePath, 'w') as fmeta: subprocess.call( ['ncks', '--jsn', '-m', '-M', resource['local_paths'][0]], stdout=fmeta) self.created += 1 self.bytes += os.path.getsize(metaFilePath) upload_to_dataset(connector, host, secret_key, resource['parent']['id'], metaFilePath) # Add json metadata to original netCDF file with open(metaFilePath, 'r') as metajson: metadata = build_metadata(host, self.extractor_info, resource['id'], json.load(metajson), 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata) self.end_message()
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore if not contains_required_files( resource, ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']): self.log_skip(resource, "missing required files") return CheckMessage.ignore if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' else: sensor_fullname = 'vnir_netcdf' timestamp = resource['dataset_info']['name'].split(" - ")[1] md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist out_nc = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname) if file_exists(out_nc): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") # See if we can recover it from disk if sensor_fullname == 'vnir_netcdf': date = timestamp.split("__")[0] source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % ( date, timestamp) for f in os.listdir(source_dir): if f.endswith("_metadata.json"): self.log_info(resource, "updating metadata from %s" % f) raw_dsmd = load_json_file(os.path.join(source_dir, f)) clean_md = clean_metadata(raw_dsmd, 'VNIR') complete_md = build_metadata(host, self.extractor_info, resource['id'], clean_md, 'dataset') remove_metadata(connector, host, secret_key, resource['id']) upload_metadata(connector, host, secret_key, resource['id'], complete_md) return CheckMessage.download return CheckMessage.ignore
def upload_to_bety(file, clowder_id): conn = Connector( None, mounted_paths={"/home/clowder/sites": "/home/clowder/sites"}) submit_traits(file, betykey=bety_key) # Extractor metadata extractor_info = { "extractor_name": "terra.betydb", "extractor_version": "1.0", "extractor_author": "Max Burnette <*****@*****.**>", "extractor_description": "BETYdb CSV uploader", "extractor_repo": "https://github.com/terraref/computing-pipeline.git" } # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, extractor_info, clowder_id, { "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/v1/variables?name=canopy_cover" }, 'file') upload_metadata(conn, host, secret_key, clowder_id, ext_meta)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() for p in resource['local_paths']: if p.endswith(".bin"): input_dir = p.replace(os.path.basename(p), '') # TODO: Eventually light may be in separate location input_dir_light = input_dir # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] out_name_base = self.sensors.create_sensor_path(timestamp, ext='') uploaded_file_ids = [] subprocess.call(["octave --eval \"PSII(\'%s\',\'%s\' ,\'%s\')\"" % (input_dir, input_dir_light, out_name_base)],shell=True); target_dsid = build_dataset_hierarchy(connector, host, secret_key, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[:7], timestamp[:10], leaf_ds_name=resource['dataset_info']['name']) for out_file in ["_Fm_dark", "_Fv_dark", "_FvFm_dark", "_Fm_light", "_Fv_light", "_FvFm_light", "_Phi_PSII", "_NPQ", "_qN", "_qP", "_Rfd"]: full_out_name = out_name_base + out_file + ".png" if os.path.isfile(full_out_name) and full_out_name not in resource["local_paths"]: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, full_out_name) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(full_out_name) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get bin files and metadata metadata = None for f in resource['local_paths']: # First check metadata attached to dataset in Clowder for item of interest if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) metadata = get_terraref_metadata(all_dsmd, "ps2Top") # Otherwise, check if metadata was uploaded as a .json file elif f.endswith('_metadata.json') and f.find('/_metadata.json') == -1 and metadata is None: metadata = load_json_file(f) frames = {} for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 for f in resource['local_paths']: if f.endswith(format_ind+'.bin'): frames[ind] = f if None in [metadata] or len(frames) < 101: self.log_error(resource, 'could not find all of frames/metadata') return # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] hist_path = self.sensors.create_sensor_path(timestamp, opts=['combined_hist']) coloredImg_path = self.sensors.create_sensor_path(timestamp, opts=['combined_pseudocolored']) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp) (img_width, img_height) = self.get_image_dimensions(metadata) gps_bounds = geojson_to_tuples(metadata['spatial_metadata']['ps2Top']['bounding_box']) self.log_info(resource, "image dimensions (w, h): (%s, %s)" % (img_width, img_height)) png_frames = {} # skip 0101.bin since 101 is an XML file that lists the frame times for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 png_path = self.sensors.create_sensor_path(timestamp, opts=[format_ind]) tif_path = png_path.replace(".png", ".tif") png_frames[ind] = png_path if not os.path.exists(png_path) or self.overwrite: self.log_info(resource, "generating and uploading %s" % png_path) pixels = np.fromfile(frames[ind], np.dtype('uint8')).reshape([int(img_height), int(img_width)]) create_image(pixels, png_path) create_geotiff(pixels, gps_bounds, tif_path, None, False, self.extractor_info, metadata) if png_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(png_path) # Generate aggregate outputs self.log_info(resource, "generating aggregates") if not (os.path.exists(hist_path) and os.path.exists(coloredImg_path)) or self.overwrite: # TODO: Coerce histogram and pseudocolor to geotiff? self.analyze(int(img_width), int(img_height), png_frames, hist_path, coloredImg_path) self.created += 2 self.bytes += os.path.getsize(hist_path) + os.path.getsize(coloredImg_path) if hist_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, hist_path) uploaded_file_ids.append(fileid) if coloredImg_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, coloredImg_path) uploaded_file_ids.append(fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Write the CSV to the same directory as the source file ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] time_fmt = timestamp + "T12:00:00-07:00" rootdir = self.sensors.create_sensor_path(timestamp, sensor="rgb_fullfield", ext=".csv") out_csv = os.path.join( os.path.dirname(rootdir), resource['name'].replace(".tif", "_canopycover_bety.csv")) out_geo = os.path.join( os.path.dirname(rootdir), resource['name'].replace(".tif", "_canopycover_geo.csv")) # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata... self.log_info(resource, "Writing BETY CSV to %s" % out_csv) csv_file = open(out_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo) geo_file = open(out_geo, 'w') geo_file.write(','.join([ 'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp' ]) + '\n') # Get full list of experiment plots using date as filter all_plots = get_site_boundaries(timestamp, city='Maricopa') self.log_info(resource, "found %s plots on %s" % (len(all_plots), timestamp)) successful_plots = 0 for plotname in all_plots: if plotname.find("KSU") > -1: self.log_info(resource, "skipping %s" % plotname) continue bounds = all_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds)) centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] # Use GeoJSON string to clip full field to this plot try: pxarray = clip_raster(resource['local_paths'][0], tuples) if pxarray is not None: if len(pxarray.shape) < 3: self.log_error( resource, "unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue ccVal = calculate_canopycover_masked( rollaxis(pxarray, 0, 3)) if (ccVal > -1): # Prepare and submit datapoint geo_file.write(','.join([ plotname, 'Canopy Cover', str(centroid_lonlat[1]), str(centroid_lonlat[0]), time_fmt, host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], str(ccVal), timestamp ]) + '\n') successful_plots += 1 if successful_plots % 10 == 0: self.log_info( resource, "processed %s/%s plots" % (successful_plots, len(all_plots))) else: continue except: self.log_error(resource, "error generating cc for %s" % plotname) continue if (ccVal > -1): traits['canopy_cover'] = str(ccVal) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') csv_file.close() geo_file.close() # Upload this CSV to Clowder fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_csv) geoid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_geo) # Add metadata to original dataset indicating this was run self.log_info(resource, "updating file metadata") ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"files_created": [fileid, geoid]}, 'file') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) # Trigger separate extractors self.log_info(resource, "triggering BETY extractor on %s" % fileid) submit_extraction(connector, host, secret_key, fileid, "terra.betydb") self.log_info(resource, "triggering geostreams extractor on %s" % geoid) submit_extraction(connector, host, secret_key, geoid, "terra.geostreams") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] time_fmt = timestamp+"T12:00:00-07:00" out_csv = self.sensors.create_sensor_path(timestamp, sensor="ir_meantemp", opts=["bety"]) out_geo = out_csv.replace("_bety", "_geo") # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata... self.log_info(resource, "Writing BETY CSV to %s" % out_csv) csv_file = open(out_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo) geo_file = open(out_geo, 'w') geo_file.write(','.join(['site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp']) + '\n') successful_plots = 0 nan_plots = 0 all_plots = get_site_boundaries(timestamp, city='Maricopa') for plotname in all_plots: if plotname.find("KSU") > -1: self.log_info(resource, "skipping %s" % plotname) continue bounds = all_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds)) centroid_lonlat = json.loads(centroid_from_geojson(bounds))["coordinates"] # Use GeoJSON string to clip full field to this plot pxarray = clip_raster(resource['local_paths'][0], tuples) # Filter out any pxarray[pxarray < 0] = numpy.nan mean_tc = numpy.nanmean(pxarray) - 273.15 # Create BETY-ready CSV if not numpy.isnan(mean_tc): geo_file.write(','.join([plotname, 'IR Surface Temperature', str(centroid_lonlat[1]), str(centroid_lonlat[0]), time_fmt, host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], str(mean_tc), timestamp]) + '\n') traits['surface_temperature'] = str(mean_tc) traits['site'] = plotname traits['local_datetime'] = timestamp+"T12:00:00" trait_list = generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') else: nan_plots += 1 successful_plots += 1 self.log_info(resource, "skipped %s of %s plots due to NaN" % (nan_plots, len(all_plots))) # submit CSV to BETY csv_file.close() geo_file.close() # Upload CSVs to Clowder fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_csv) geoid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_geo) # Tell Clowder this is completed so subsequent file updates don't daisy-chain self.log_info(resource, "updating file metadata") metadata = build_metadata(host, self.extractor_info, resource['parent']['id'], { "total_plots": len(all_plots), "plots_processed": successful_plots, "blank_plots": nan_plots, "files_created": [fileid, geoid], "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata) # Trigger downstream extractors self.log_info(resource, "triggering BETY extractor on %s" % fileid) submit_extraction(connector, host, secret_key, fileid, "terra.betydb") self.log_info(resource, "triggering geostreams extractor on %s" % geoid) submit_extraction(connector, host, secret_key, geoid, "terra.geostreams") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): super(rgbEnhancementExtractor, self).process_message(connector, host, secret_key, resource, parameters) self.start_message(resource) # Get left/right files and metadata process_files = [] if not self.get_terraref_metadata is None: process_files = find_terraref_files(resource) else: process_files = find_image_files(self.args.identify_binary, resource, self.file_infodata_file_ending) # Get the best username, password, and space old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace) self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context() # Ensure that the clowder information is valid if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user, self.clowder_pass): self.log_error(resource, "Clowder configuration is invalid. Not processing " +\ "request") self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space) self.end_message(resource) return # Change the base path of files to include the user by tweaking the sensor's value sensor_old_base = None if self.get_terraref_metadata is None: _, new_base = self.get_username_with_base_path(host, secret_key, resource['id'], self.sensors.base) sensor_old_base = self.sensors.base self.sensors.base = new_base # Prepare for processing files timestamp = timestamp_to_terraref(self.find_timestamp(resource['dataset_info']['name'])) target_dsid = resource['id'] uploaded_file_ids = [] ratios = [] try: for one_file in process_files: mask_source = one_file # Make sure the source image is in the correct EPSG space epsg = get_epsg(one_file) if epsg != self.default_epsg: self.log_info(resource, "Reprojecting from " + str(epsg) + " to default " + str(self.default_epsg)) _, tmp_name = tempfile.mkstemp() src = gdal.Open(one_file) gdal.Warp(tmp_name, src, dstSRS='EPSG:'+str(self.default_epsg)) mask_source = tmp_name # Get the bounds of the image to see if we can process it. Also get the mask filename rgb_mask_tif, bounds = self.get_maskfilename_bounds(mask_source, timestamp) if bounds is None: self.log_skip(resource, "Skipping non-georeferenced image: " + \ os.path.basename(one_file)) if mask_source != one_file: os.remove(mask_source) continue if not file_exists(rgb_mask_tif) or self.overwrite: self.log_info(resource, "creating %s" % rgb_mask_tif) mask_ratio, mask_rgb = gen_cc_enhanced(mask_source) ratios.append(mask_ratio) # Bands must be reordered to avoid swapping R and B mask_rgb = cv2.cvtColor(mask_rgb, cv2.COLOR_BGR2RGB) create_geotiff(mask_rgb, bounds, rgb_mask_tif, None, False, self.extractor_info, self.get_terraref_metadata) compress_geotiff(rgb_mask_tif) # Remove any temporary file if mask_source != one_file: os.remove(mask_source) self.created += 1 self.bytes += os.path.getsize(rgb_mask_tif) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, rgb_mask_tif, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % rgb_mask_tif) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, rgb_mask_tif) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain if not self.get_terraref_metadata is None: ratios_len = len(ratios) left_ratio = (ratios[0] if ratios_len > 0 else None) right_ratio = (ratios[1] if ratios_len > 1 else None) md = { "files_created": uploaded_file_ids } if not left_ratio is None: md["left_mask_ratio"] = left_ratio if not self.leftonly and not right_ratio is None: md["right_mask_ratio"] = right_ratio extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) finally: # Signal end of processing message and restore changed variables. Be sure to restore # changed variables above with early returns if not sensor_old_base is None: self.sensors.base = sensor_old_base self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): """Performs plot level image extraction Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API resource(dict): dictionary containing the resources associated with the request parameters(json): json object of the triggering message contents """ self.start_message(resource) super(ClipByShape, self).process_message(connector, host, secret_key, resource, parameters) # Handle any parameters if isinstance(parameters, basestring): parameters = json.loads(parameters) if isinstance(parameters, unicode): parameters = json.loads(str(parameters)) # Initialize local variables dataset_name = parameters["datasetname"] season_name, experiment_name = "Unknown Season", "Unknown Experiment" datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None # Array containing the links to uploaded files uploaded_file_ids = [] # Find the files we're interested in # pylint: disable=line-too-long (shapefile, shxfile, dbffile, imagefiles) = self.find_shape_image_files(resource['local_paths'], resource['triggering_file']) # pylint: enable=line-too-long if shapefile is None: self.log_skip(resource, "No shapefile found") return if shxfile is None: self.log_skip(resource, "No SHX file found") return num_image_files = len(imagefiles) if num_image_files <= 0: self.log_skip(resource, "No image files with geographic boundaries found") return # Get the best username, password, and space old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace) self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context( ) # Ensure that the clowder information is valid if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user, self.clowder_pass): self.log_error(resource, "Clowder configuration is invalid. Not processing " +\ "request") self.clowder_user, self.clowder_pass, self.clowderspace = ( old_un, old_pw, old_space) self.end_message(resource) return # Change the base path of files to include the user by tweaking the sensor's value sensor_old_base = None if self.get_terraref_metadata is None: _, new_base = self.get_username_with_base_path( host, secret_key, resource['id'], self.sensors.base) sensor_old_base = self.sensors.base self.sensors.base = new_base try: # Build up a list of image IDs image_ids = {} if 'files' in resource: for one_image in imagefiles: image_name = os.path.basename(one_image) for res_file in resource['files']: if ('filename' in res_file) and ('id' in res_file) and \ (image_name == res_file['filename']): image_ids[image_name] = res_file['id'] # Get timestamps. Also get season and experiment information for Clowder collections datestamp = self.find_datestamp(dataset_name) timestamp = timestamp_to_terraref( self.find_timestamp(dataset_name)) (season_name, experiment_name, _) = self.get_season_and_experiment(datestamp, self.sensor_name) if self.experiment_metadata: if 'extractors' in self.experiment_metadata: extractor_json = self.experiment_metadata['extractors'] if 'shapefile' in extractor_json: if 'plot_column_name' in extractor_json['shapefile']: plot_name_idx = extractor_json['shapefile'][ 'plot_column_name'] # Check our current local variables if dbffile is None: self.log_info(resource, "DBF file not found, using default plot naming") self.log_info(resource, "Extracting plots using shapefile '" + \ os.path.basename(shapefile) + "'") # Load the shapes and find the plot name column if we have a DBF file shape_in = ogr.Open(shapefile) layer = shape_in.GetLayer( os.path.split(os.path.splitext(shapefile)[0])[1]) feature = layer.GetNextFeature() layer_ref = layer.GetSpatialRef() if dbffile: shape_table = DBF(dbffile, lowernames=True, ignore_missing_memofile=True) shape_rows = iter(list(shape_table)) # Make sure if we have the column name of plot-names specified that it exists in # the shapefile column_names = shape_table.field_names if not plot_name_idx is None: if not find_all_plot_names(plot_name_idx, column_names): ValueError( resource, "Shapefile data does not have specified plot name" + " column '" + plot_name_idx + "'") # Lookup a plot name field to use if plot_name_idx is None: for one_name in column_names: # pylint: disable=line-too-long if one_name == "observationUnitName": plot_name_idx = one_name break elif (one_name.find('plot') >= 0) and ( (one_name.find('name') >= 0) or one_name.find('id')): plot_name_idx = one_name break elif one_name == 'id': plot_name_idx = one_name break # pylint: enable=line-too-long if plot_name_idx is None: ValueError( resource, "Shapefile data does not have a plot name field '" + os.path.basename(dbffile) + "'") # Setup for the extracted plot images plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \ " (By Plot)" # Loop through each polygon and extract plot level data alternate_plot_id = 0 while feature: # Current geometry to extract plot_poly = feature.GetGeometryRef() if layer_ref: plot_poly.AssignSpatialReference(layer_ref) plot_spatial_ref = plot_poly.GetSpatialReference() # Determie the plot name to use plot_name = None alternate_plot_id = alternate_plot_id + 1 if shape_rows and plot_name_idx: try: row = next(shape_rows) plot_name = get_plot_name(plot_name_idx, row) except StopIteration: pass if not plot_name: plot_name = "plot_" + str(alternate_plot_id) # Determine output dataset name leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, datestamp[:4], datestamp[5:7], datestamp[8:10], leaf_dataset)) # Create the dataset, even if we have no data to put in it, so that the caller knows # it was addressed target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, datestamp[:4], datestamp[5:7], datestamp[8:10], leaf_ds_name=leaf_dataset) # Loop through all the images looking for overlap for filename in imagefiles: # Get the bounds. We also get the reference systems in case we need to convert # between them bounds = imagefiles[filename]['bounds'] bounds_spatial_ref = bounds.GetSpatialReference() # Checking for geographic overlap and skip if there is none if not bounds_spatial_ref.IsSame(plot_spatial_ref): # We need to convert coordinate system before an intersection transform = osr.CoordinateTransformation( bounds_spatial_ref, plot_spatial_ref) new_bounds = bounds.Clone() if new_bounds: new_bounds.Transform(transform) intersection = plot_poly.Intersection(new_bounds) new_bounds = None else: # Same coordinate system. Simple intersection intersection = plot_poly.Intersection(bounds) if intersection.GetArea() == 0.0: self.log_info(resource, "Skipping image: " + filename) continue # Determine where we're putting the clipped file on disk and determine overwrite # pylint: disable=unexpected-keyword-arg out_file = self.sensors.create_sensor_path( timestamp, filename=os.path.basename(filename), plot=plot_name, subsensor=self.sensor_name) if (file_exists(out_file) and not self.overwrite): # The file exists and don't want to overwrite it self.logger.warn("Skipping existing output file: %s", out_file) continue self.log_info( resource, "Attempting to clip '" + filename + "' to polygon number " + str(alternate_plot_id)) # Create destination folder on disk if we haven't done that already if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) # Clip the raster bounds_tuple = polygon_to_tuples_transform( plot_poly, bounds_spatial_ref) clip_pix = clip_raster(filename, bounds_tuple, out_path=out_file) if clip_pix is None: self.log_error( resource, "Failed to clip image to plot name " + plot_name) continue # Upload the clipped image to the dataset found_in_dest = check_file_in_dataset( connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: image_name = os.path.basename(filename) content = { "comment": "Clipped from shapefile " + os.path.basename(shapefile), "imageName": image_name } if image_name in image_ids: content['imageID'] = image_ids[image_name] fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_file) uploaded_file_ids.append(fileid) # Generate our metadata meta = build_metadata(host, self.extractor_info, fileid, content, 'file') clowder_file.upload_metadata(connector, host, secret_key, fileid, meta) else: self.logger.warn( "Skipping existing file in dataset: %s", out_file) self.created += 1 self.bytes += os.path.getsize(out_file) # Get the next shape to extract feature = layer.GetNextFeature() # Tell Clowder this is completed so subsequent file updates don't daisy-chain id_len = len(uploaded_file_ids) if id_len > 0 or self.created > 0: extractor_md = build_metadata( host, self.extractor_info, resource['id'], {"files_created": uploaded_file_ids}, 'dataset') self.log_info( resource, "Uploading shapefile plot extractor metadata to Level_2 dataset: " + str(extractor_md)) clowder_dataset.remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) clowder_dataset.upload_metadata(connector, host, secret_key, resource['id'], extractor_md) else: self.logger.warn( "Skipping dataset metadata updating since no files were loaded" ) finally: # Signal end of processing message and restore changed variables. Be sure to restore # changed variables above with early returns if not sensor_old_base is None: self.sensors.base = sensor_old_base self.clowder_user, self.clowder_pass, self.clowderspace = ( old_un, old_pw, old_space) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, terra_md_full = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product if necessary target_md = download_metadata(connector, host, secret_key, target_dsid) if not get_extractor_metadata(target_md, self.extractor_info['name']): self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) try: left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left') gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right') gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) except KeyError: self.log_error(resource, "spatial metadata not properly identified; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return if (not file_exists(left_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % left_tiff) left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None) create_geotiff(left_image, gps_bounds_left, left_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if (not file_exists(right_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % right_tiff) right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None) create_geotiff(right_image, gps_bounds_right, right_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) try: upload_metadata(connector, host, secret_key, resource['id'], extractor_md) except: self.log_info(resource, "problem uploading extractor metadata...") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) terra_md = resource['metadata'] ds_info = get_info(connector, host, secret_key, resource['id']) # @begin extract_positional_info_from_metadata # @in new_dataset_added # @out gantry_geometry # @end extract_positional_info # Get sensor from datasetname self.log_info(resource, "Getting position information from metadata") (streamprefix, timestamp) = ds_info['name'].split(' - ') date = timestamp.split("__")[0] scan_time = calculate_scan_time(terra_md) streamprefix += " Datasets" dpmetadata = { "source_dataset": host + ("" if host.endswith("/") else "/") + \ "datasets/" + resource['id'], "dataset_name": ds_info['name'] } centroid = None bbox = None for entry in terra_md['spatial_metadata']: if 'centroid' in terra_md['spatial_metadata'][entry]: centroid = terra_md['spatial_metadata'][entry]['centroid'] if 'bounding_box' in terra_md['spatial_metadata'][entry]: bbox = terra_md['spatial_metadata'][entry]['bounding_box'] bbox = { "type": bbox['type'], "coordinates": [bbox['coordinates']] } if 'site_metadata' in terra_md: # We've already determined the plot associated with this dataset so we can skip some work self.log_info( resource, "Creating datapoint without lookup in %s" % streamprefix) create_datapoint_with_dependencies( connector, host, secret_key, streamprefix, centroid, scan_time, scan_time, dpmetadata, date, bbox, terra_md['site_metadata']['sitename']) else: # We need to do the traditional querying for plot self.log_info( resource, "Creating datapoint with lookup in %s" % streamprefix) create_datapoint_with_dependencies(connector, host, secret_key, streamprefix, centroid, scan_time, scan_time, dpmetadata, date, bbox) # Attach geometry to Clowder metadata as well self.log_info(resource, "Uploading dataset metadata") ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"datapoints_added": 1}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): """Performs plot level image extraction Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API resource(dict): dictionary containing the resources associated with the request parameters(json): json object of the triggering message contents """ # pylint: disable=global-statement global SENSOR_NAME global FIELD_NAME_LIST self.start_message(resource) super(PlotExtractor, self).process_message(connector, host, secret_key, resource, parameters) # Initialize local variables dataset_name = resource["name"] experiment_name = "Unknown Experiment" datestamp = None citation_auth_override, citation_title_override, citation_year_override = None, None, None config_specie = None # Intialize data writing overrides. We have some reverse logic here due to the intent of # the variables store_in_geostreams = True if not hasattr(configuration, "NEVER_WRITE_GEOSTREAMS") \ else not getattr(configuration, "NEVER_WRITE_GEOSTREAMS") store_in_betydb = True if not hasattr(configuration, "NEVER_WRITE_BETYDB") \ else not getattr(configuration, "NEVER_WRITE_BETYDB") create_csv_files = True if not hasattr(configuration, "NEVER_WRITE_CSV") \ else not getattr(configuration, "NEVER_WRITE_CSV") out_geo = None out_csv = None # Find the files we're interested in imagefiles = self.find_image_files(resource['local_paths']) num_image_files = len(imagefiles) if num_image_files <= 0: self.log_skip(resource, "No image files with geographic boundaries found") return # Setup overrides and get the restore function restore_fn = self.setup_overrides(host, secret_key, resource) if not restore_fn: self.end_message(resource) return try: # Get the best timestamp timestamp = terraref_timestamp_to_iso( self.find_timestamp(resource['dataset_info']['name'])) if 'T' in timestamp: datestamp = timestamp.split('T')[0] else: datestamp = timestamp timestamp += 'T12:00:00' if timestamp.find('T') > 0 and timestamp.rfind( '-') > 0 and timestamp.find('T') < timestamp.rfind('-'): # Convert to local time. We can do this due to site definitions having # the time offsets as part of their definition localtime = timestamp[0:timestamp.rfind('-')] else: localtime = timestamp _, experiment_name, _ = self.get_season_and_experiment( timestamp_to_terraref(timestamp), self.sensor_name) # Build up a list of image IDs image_ids = {} if 'files' in resource: for one_image in imagefiles: image_name = os.path.basename(one_image) for res_file in resource['files']: if ('filename' in res_file) and ('id' in res_file) and \ (image_name == res_file['filename']): image_ids[image_name] = res_file['id'] file_filters = self.get_file_filters() if self.experiment_metadata: extractor_json = self.find_extractor_json() if extractor_json: if 'citationAuthor' in extractor_json: citation_auth_override = extractor_json[ 'citationAuthor'] if 'citationYear' in extractor_json: citation_year_override = extractor_json['citationYear'] if 'citationTitle' in extractor_json: citation_title_override = extractor_json[ 'citationTitle'] if 'noGeostreams' in extractor_json: store_in_geostreams = False if 'noBETYdb' in extractor_json: store_in_betydb = False if 'noCSV' in extractor_json: create_csv_files = False if 'germplasmName' in self.experiment_metadata: config_specie = self.experiment_metadata['germplasmName'] # Create the output files rootdir = self.sensors.create_sensor_path(timestamp, sensor=SENSOR_NAME, ext=".csv", opts=[experiment_name]) (bety_fields, bety_traits) = get_bety_traits_table() (geo_fields, geo_traits) = get_geo_traits_table() if create_csv_files: out_geo = os.path.splitext( rootdir)[0] + "_" + SENSOR_NAME + "_geo.csv" self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo) out_csv = os.path.splitext( rootdir)[0] + "_" + SENSOR_NAME + ".csv" self.log_info(resource, "Writing Shapefile CSV to %s" % out_csv) # Setup default trait values if not config_specie is None: bety_traits['species'] = config_specie if not citation_auth_override is None: bety_traits['citation_author'] = citation_auth_override if not citation_title_override is None: bety_traits['citation_title'] = citation_title_override if not citation_year_override is None: bety_traits['citation_year'] = citation_year_override else: bety_traits['citation_year'] = datestamp[:4] bety_csv_header = ','.join(map(str, bety_fields)) geo_csv_header = ','.join(map(str, geo_fields)) # Loop through all the images (of which there should be one - see above) geo_rows = [] bety_rows = [] len_field_value = len(FIELD_NAME_LIST) for filename in imagefiles: # Check if we're filtering files if file_filters: if not file_filtered_in(filename, file_filters): continue try: calc_value = "" # Load the pixels clip_pix = np.array(gdal.Open(filename).ReadAsArray()) # Get additional, necessary data centroid = imagefiles[filename]["bounds"].Centroid() plot_name = _get_plot_name( [resource['dataset_info']['name'], dataset_name]) calc_value = calculate(np.rollaxis(clip_pix, 0, 3)) # Convert to something iterable that's in the correct order if isinstance(calc_value, set): raise RuntimeError("A 'set' type of data was returned and isn't supported. " \ "Please use a list or a tuple instead") elif isinstance(calc_value, dict): # Assume the dictionary is going to have field names with their values # We check whether we have the correct number of fields later. This also # filters out extra fields values = [] for key in FIELD_NAME_LIST: if key in calc_value: values.append(calc_value[key]) elif not isinstance(calc_value, (list, tuple)): values = [calc_value] # Sanity check our values len_calc_value = len(values) if not len_calc_value == len_field_value: raise RuntimeError( "Incorrect number of values returned. Expected " + str(len_field_value) + " and received " + str(len_calc_value)) # Prepare the data for writing image_clowder_id = "" image_name = os.path.basename(filename) if image_name in image_ids: image_clowder_id = image_ids[image_name] geo_traits['site'] = plot_name geo_traits['lat'] = str(centroid.GetY()) geo_traits['lon'] = str(centroid.GetX()) geo_traits['dp_time'] = localtime geo_traits['source'] = host.rstrip('/') + '/files/' + str( image_clowder_id) geo_traits['timestamp'] = datestamp # Write the data points geographically and otherwise for idx in range(0, len_field_value): # The way the code is configured, Geostreams can only handle one field # at a time so we write out one row per field/value pair geo_traits['trait'] = FIELD_NAME_LIST[idx] geo_traits['value'] = str(values[idx]) trait_list = generate_traits_list( geo_fields, geo_traits) csv_data = ','.join(map(str, trait_list)) if out_geo: self.write_csv_file(resource, out_geo, geo_csv_header, csv_data) if store_in_geostreams: geo_rows.append(csv_data) # BETYdb can handle wide rows with multiple values so we just set the field # values here and write the single row after the loop bety_traits[FIELD_NAME_LIST[idx]] = str(values[idx]) bety_traits['site'] = plot_name bety_traits['local_datetime'] = localtime trait_list = generate_traits_list(bety_fields, bety_traits) csv_data = ','.join(map(str, trait_list)) if out_csv: self.write_csv_file(resource, out_csv, bety_csv_header, csv_data) if store_in_betydb: bety_rows.append(csv_data) except Exception as ex: self.log_error( resource, "error generating " + EXTRACTOR_NAME + " for %s" % plot_name) self.log_error(resource, " exception: %s" % str(ex)) continue # Only process the first file that's valid if num_image_files > 1: self.log_info( resource, "Multiple image files were found, only using first found" ) break # Upload any geostreams or betydb data if store_in_geostreams: if geo_rows: update_geostreams(connector, host, secret_key, geo_csv_header, geo_rows) else: self.log_info( resource, "No geostreams data was generated to upload") if store_in_betydb: if bety_rows: update_betydb(bety_csv_header, bety_rows) else: self.log_info(resource, "No BETYdb data was generated to upload") # Update this dataset with the extractor info dataset_id = self.get_dataset_id(host, secret_key, resource, dataset_name) try: # Tell Clowder this is completed so subsequent file updates don't daisy-chain self.log_info(resource, "updating dataset metadata") content = { "comment": "Calculated " + SENSOR_NAME + " index", SENSOR_NAME + " value": calc_value } if self.experiment_metadata: content.update( prepare_pipeline_metadata(self.experiment_metadata)) extractor_md = build_metadata(host, self.extractor_info, dataset_id, content, 'dataset') clowder_dataset.remove_metadata(connector, host, secret_key, dataset_id, self.extractor_info['name']) clowder_dataset.upload_metadata(connector, host, secret_key, dataset_id, extractor_md) except Exception as ex: self.log_error( resource, "Exception updating dataset metadata: " + str(ex)) finally: # Signal end of processing message and restore changed variables. Be sure to restore # changed variables above with early returns if restore_fn: restore_fn() self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) metadata = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, metadata]: self.log_error( "could not locate each of left+right+metadata in processing") raise ValueError( "could not locate each of left+right+metadata in processing") # Determine output location & filenames timestamp = resource['dataset_info']['name'].split(" - ")[1] left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] self.log_info(resource, "determining image shapes & gps bounds") left_shape = bin2tiff.get_image_shape(metadata, 'left') right_shape = bin2tiff.get_image_shape(metadata, 'right') left_gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['left']['bounding_box']) right_gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['right']['bounding_box']) out_tmp_tiff = os.path.join(tempfile.gettempdir(), resource['id'].encode('utf8')) target_dsid = build_dataset_hierarchy( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) if (not os.path.isfile(left_tiff)) or self.overwrite: self.log_info(resource, "creating & uploading %s" % left_tiff) left_image = bin2tiff.process_image(left_shape, img_left, None) # Rename output.tif after creation to avoid long path errors create_geotiff(left_image, left_gps_bounds, out_tmp_tiff, None, False, self.extractor_info, metadata) # TODO: we're moving zero byte files shutil.move(out_tmp_tiff, left_tiff) if left_tiff not in resource['local_paths']: fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) else: self.log_info( resource, "file found in dataset already; not re-uploading") self.created += 1 self.bytes += os.path.getsize(left_tiff) if (not os.path.isfile(right_tiff)) or self.overwrite: self.log_info(resource, "creating & uploading %s" % right_tiff) right_image = bin2tiff.process_image(right_shape, img_right, None) create_geotiff(right_image, right_gps_bounds, out_tmp_tiff, None, False, self.extractor_info, metadata) shutil.move(out_tmp_tiff, right_tiff) if right_tiff not in resource['local_paths']: fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) else: self.log_info( resource, "file found in dataset already; not re-uploading") self.created += 1 self.bytes += os.path.getsize(right_tiff) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], ext_meta) # Upload original Lemnatec metadata to new Level_1 dataset md = get_terraref_metadata(all_dsmd) md['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] lemna_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading LemnaTec metadata") upload_metadata(connector, host, secret_key, target_dsid, lemna_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Build list of JSON files json_files = [] for f in resource['files']: if f['filename'].endswith("_environmentlogger.json"): if f['filepath'].startswith("/home/clowder"): json_files.append(f['filepath'].replace( "/home/clowder", "/home/extractor")) else: json_files.append(f['filepath']) json_files.sort() # Determine full output path timestamp = resource['name'].split(" - ")[1] out_fullday_netcdf = self.sensors.create_sensor_path(timestamp) temp_out_full = os.path.join(os.path.dirname(out_fullday_netcdf), "temp_full.nc") temp_out_single = temp_out_full.replace("_full.nc", "_single.nc") geo_csv = out_fullday_netcdf.replace(".nc", "_geo.csv") if not file_exists(temp_out_full): for json_file in json_files: self.log_info( resource, "converting %s to netCDF & appending" % os.path.basename(json_file)) ela.mainProgramTrigger(json_file, temp_out_single) cmd = "ncrcat --record_append %s %s" % (temp_out_single, temp_out_full) subprocess.call([cmd], shell=True) os.remove(temp_out_single) shutil.move(temp_out_full, out_fullday_netcdf) self.created += 1 self.bytes += os.path.getsize(out_fullday_netcdf) # Write out geostreams.csv if not file_exists(geo_csv): self.log_info(resource, "writing geostreams CSV") geo_file = open(geo_csv, 'w') geo_file.write(','.join([ 'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp' ]) + '\n') with Dataset(out_fullday_netcdf, "r") as ncdf: streams = set([ sensor_info.name for sensor_info in ncdf.variables.values() if sensor_info.name.startswith('sensor') ]) for stream in streams: if stream != "sensor_spectrum": try: memberlist = ncdf.get_variables_by_attributes( sensor=stream) for members in memberlist: data_points = _produce_attr_dict(members) for index in range(len(data_points)): dp_obj = data_points[index] if dp_obj["sensor"] == stream: time_format = "%Y-%m-%dT%H:%M:%S-07:00" time_point = (datetime.datetime(year=1970, month=1, day=1) + \ datetime.timedelta(days=ncdf.variables["time"][index])).strftime(time_format) geo_file.write(','.join([ "Full Field - Environmental Logger", "(EL) %s" % stream, str(33.075576), str(-111.974304), time_point, host + ("" if host.endswith("/") else "/" ) + "datasets/" + resource['id'], '"%s"' % json.dumps(dp_obj). replace('"', '""'), timestamp ]) + '\n') except: self.log_error( resource, "NetCDF attribute not found: %s" % stream) # Fetch dataset ID by dataset name if not provided target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, None, None, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) ds_files = get_file_list(connector, host, secret_key, target_dsid) found_full = False found_csv = False for f in ds_files: if f['filename'] == os.path.basename(out_fullday_netcdf): found_full = True if f['filename'] == os.path.basename(geo_csv): found_csv = True if not found_full: upload_to_dataset(connector, host, secret_key, target_dsid, out_fullday_netcdf) if not found_csv: geoid = upload_to_dataset(connector, host, secret_key, target_dsid, geo_csv) self.log_info(resource, "triggering geostreams extractor on %s" % geoid) submit_extraction(connector, host, secret_key, geoid, "terra.geostreams") # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"output_dataset": target_dsid}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get BIN file and metadata bin_file, terra_md_full = None, None for f in resource['local_paths']: if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera') elif f.endswith('_ir.bin'): bin_file = f if None in [bin_file, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp) tiff_path = self.sensors.create_sensor_path(timestamp) png_path = tiff_path.replace(".tif", ".png") uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) skipped_png = False if not file_exists(png_path) or self.overwrite: # Perform actual processing self.log_info(resource, "creating & uploading %s" % png_path) raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) create_image(raw_data, png_path, self.scale_values) self.created += 1 self.bytes += os.path.getsize(png_path) else: skipped_png = True # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not file_exists(tiff_path) or self.overwrite: # Generate temperature matrix and perform actual processing self.log_info(resource, "creating & uploading %s" % tiff_path) gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box']) if skipped_png: raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full) self.created += 1 self.bytes += os.path.getsize(tiff_path) # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() tmp_csv = "canopycovertraits.csv" csv_file = open(tmp_csv, 'w') (fields, traits) = ccCore.get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') # Get full list of experiment plots using date as filter logging.info(connector) logging.info(host) logging.info(secret_key) logging.info(resource) ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot try: (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) if len(pxarray.shape) < 3: logging.error("unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue ccVal = ccCore.gen_cc_for_img(rollaxis(pxarray, 0, 3), 5) ccVal *= 100.0 # Make 0-100 instead of 0-1 successful_plots += 1 if successful_plots % 10 == 0: logging.info("processed %s/%s plots successfully" % (successful_plots, len(all_plots))) except: logging.error("error generating cc for %s" % plotname) continue traits['canopy_cover'] = str(ccVal) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = ccCore.generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], "canopy_cover": ccVal } create_datapoint_with_dependencies( connector, host, secret_key, "Canopy Cover", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) # submit CSV to BETY csv_file.close() submit_traits(tmp_csv, betykey=self.bety_key) # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots, "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=canopy_cover" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot try: (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) if len(pxarray.shape) < 3: logging.error("unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue plot_img = create_image(pxarray, "plot_image.png") plot_csv = "plot.csv" self.generate_table_only(plot_img, plot_csv) trait_vals = self.extract_vals_from_csv(plot_csv) successful_plots += 1 if successful_plots % 10 == 0: logging.info("processed %s/%s plots successfully" % (successful_plots, len(all_plots))) except: logging.error("error generating traits for %s" % plotname) continue # Create BETY-ready CSV (fields, traits) = self.get_traits_table() for tr in trait_vals: traits[tr] = str(trait_vals[tr]) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12-00-00-000" trait_list = self.generate_traits_list(traits) self.generate_cc_csv(plot_csv, fields, trait_list) # submit CSV to BETY submit_traits(plot_csv, self.bety_key) # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + "files/" + resource['id'], } for tr in trait_vals: dpmetadata[tr] = str(trait_vals[tr]) create_datapoint_with_dependencies( connector, host, secret_key, "Canopy Cover", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) os.remove(plot_img) os.remove(plot_csv) # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots # TODO: add link to BETY trait IDs }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): """Performs plot level image extraction Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API resource(dict): dictionary containing the resources associated with the request parameters(json): json object of the triggering message contents """ self.start_message(resource) super(CanopyCover, self).process_message(connector, host, secret_key, resource, parameters) # Initialize local variables dataset_name = resource["name"] experiment_name = "Unknown Experiment" datestamp = None citation_auth_override, citation_title_override, citation_year_override = None, None, None config_specie = None store_in_geostreams = True store_in_betydb = True create_csv_files = True out_geo = None out_csv = None # Find the files we're interested in imagefiles = self.find_image_files(resource['local_paths'], resource) num_image_files = len(imagefiles) if num_image_files <= 0: self.log_skip(resource, "No image files with geographic boundaries found") return # Setup overrides and get the restore function restore_fn = self.setup_overrides(host, secret_key, resource) if not restore_fn: self.end_message(resource) return try: # Get the best timestamp timestamp = terraref_timestamp_to_iso( self.find_timestamp(resource['dataset_info']['name'])) if 'T' in timestamp: datestamp = timestamp.split('T')[0] else: datestamp = timestamp timestamp += 'T12:00:00' if timestamp.find('T') > 0 and timestamp.rfind( '-') > 0 and timestamp.find('T') < timestamp.rfind('-'): # Convert to local time. We can do this due to site definitions having # the time offsets as part of their definition localtime = timestamp[0:timestamp.rfind('-')] else: localtime = timestamp _, experiment_name, _ = self.get_season_and_experiment( timestamp_to_terraref(timestamp), self.sensor_name) # Build up a list of image IDs image_ids = {} if 'files' in resource: for one_image in imagefiles: image_name = os.path.basename(one_image) for res_file in resource['files']: if ('filename' in res_file) and ('id' in res_file) and \ (image_name == res_file['filename']): image_ids[image_name] = res_file['id'] file_filters = self.get_file_filters() if self.experiment_metadata: extractor_json = self.find_extractor_json() if extractor_json: if 'citationAuthor' in extractor_json: citation_auth_override = extractor_json[ 'citationAuthor'] if 'citationYear' in extractor_json: citation_year_override = extractor_json['citationYear'] if 'citationTitle' in extractor_json: citation_title_override = extractor_json[ 'citationTitle'] if 'noGeostreams' in extractor_json: store_in_geostreams = False if 'noBETYdb' in extractor_json: store_in_betydb = False if 'noCSV' in extractor_json: create_csv_files = False if 'germplasmName' in self.experiment_metadata: config_specie = self.experiment_metadata['germplasmName'] # Setup for the extracted plot canopy cover sensor_name = "canopybyplot" # Create the output files rootdir = self.sensors.create_sensor_path( timestamp_to_terraref(timestamp), sensor=sensor_name, ext=".csv", opts=[experiment_name]) (fields, traits) = get_traits_table() if create_csv_files: out_geo = os.path.splitext(rootdir)[0] + "_canopycover_geo.csv" self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo) out_csv = os.path.splitext(rootdir)[0] + "_canopycover.csv" self.log_info(resource, "Writing Shapefile CSV to %s" % out_csv) # Setup default trait values if not config_specie is None: traits['species'] = config_specie if not citation_auth_override is None: traits['citation_author'] = citation_auth_override if not citation_title_override is None: traits['citation_title'] = citation_title_override if not citation_year_override is None: traits['citation_year'] = citation_year_override else: traits['citation_year'] = datestamp[:4] bety_csv_header = ','.join(map(str, fields)) geo_csv_header = ','.join([ 'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp' ]) # Loop through all the images (of which there should be one - see above) geo_rows = [] bety_rows = [] for filename in imagefiles: # Check if we're filtering files if file_filters: if not file_filtered_in(filename, file_filters): continue try: cc_val = "" # Load the pixels clip_pix = np.array(gdal.Open(filename).ReadAsArray()) # Get additional, necessary data centroid = imagefiles[filename]["bounds"].Centroid() plot_name = _get_plot_name( [resource['dataset_info']['name'], dataset_name]) cc_val = calculate_canopycover_masked( np.rollaxis(clip_pix, 0, 3)) # Prepare the data for writing image_clowder_id = "" image_name = os.path.basename(filename) if image_name in image_ids: image_clowder_id = image_ids[image_name] # Write the datapoint geographically and otherwise csv_data = ','.join([ plot_name, 'Canopy Cover', str(centroid.GetY()), str(centroid.GetX()), localtime, host.rstrip('/') + '/files/' + image_clowder_id, str(cc_val), datestamp ]) if out_geo: self.write_csv_file(resource, out_geo, geo_csv_header, csv_data) if store_in_geostreams: geo_rows.append(csv_data) traits['canopy_cover'] = str(cc_val) traits['site'] = plot_name traits['local_datetime'] = localtime trait_list = generate_traits_list(traits) csv_data = ','.join(map(str, trait_list)) if out_csv: self.write_csv_file(resource, out_csv, bety_csv_header, csv_data) if store_in_betydb: bety_rows.append(csv_data) except Exception as ex: self.log_error( resource, "Error generating canopy cover for %s" % plot_name) self.log_error(resource, " exception: %s" % str(ex)) continue # Only process the first file that's valid if num_image_files > 1: self.log_info( resource, "Multiple image files were found, only using first found" ) break # Upload any geostreams or betydb data if store_in_geostreams: if geo_rows: try: update_geostreams(connector, host, secret_key, geo_csv_header, geo_rows) except Exception as ex: self.log_error( resource, "Exception caught while updating geostreams: " + str(ex)) else: self.log_info( resource, "No geostreams data was generated to upload") if store_in_betydb: if bety_rows: try: update_betydb(bety_csv_header, bety_rows) except Exception as ex: self.log_error( resource, "Exception caught while updating betydb: " + str(ex)) else: self.log_info(resource, "No BETYdb data was generated to upload") # Update this dataset with the extractor info dataset_id = self.get_dataset_id(host, secret_key, resource, dataset_name) try: # Tell Clowder this is completed so subsequent file updates don't daisy-chain self.log_info(resource, "updating dataset metadata") content = { "comment": "Calculated greenness index", "greenness value": cc_val } if self.experiment_metadata: content.update( prepare_pipeline_metadata(self.experiment_metadata)) extractor_md = build_metadata(host, self.extractor_info, dataset_id, content, 'dataset') clowder_dataset.remove_metadata(connector, host, secret_key, dataset_id, self.extractor_info['name']) clowder_dataset.upload_metadata(connector, host, secret_key, dataset_id, extractor_md) except Exception as ex: self.log_error( resource, "Exception updating dataset metadata: " + str(ex)) finally: # Signal end of processing message and restore changed variables. Be sure to restore # changed variables above with early returns if restore_fn: restore_fn() self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() stream_name = 'Energy Farm Observations' disp_name = self.sensors.get_display_name() if 'Weather CEN' in resource['name']: curr_sens = disp_name + ' - CEN' stream_name += ' CEN' main_coords = [-88.199801, 40.062051, 0] elif 'WeatherNE' in resource['name']: curr_sens = disp_name + ' - NE' stream_name += ' NE' main_coords = [-88.193298, 40.067379, 0] elif 'WeatherSE' in resource['name']: curr_sens = disp_name + ' - SE' stream_name += ' SE' main_coords = [-88.193573, 40.056910, 0] geom = {"type": "Point", "coordinates": main_coords} # Get sensor or create if not found sensor_data = get_sensor_by_name(connector, host, secret_key, curr_sens) if not sensor_data: sensor_id = create_sensor(connector, host, secret_key, curr_sens, geom, { "id": "Met Station", "title": "Met Station", "sensorType": 4 }, "Urbana") else: sensor_id = sensor_data['id'] # Get stream or create if not found stream_data = get_stream_by_name(connector, host, secret_key, stream_name) if not stream_data: stream_id = create_stream(connector, host, secret_key, stream_name, sensor_id, geom) else: stream_id = stream_data['id'] # Get metadata to check till what time the file was processed last. Start processing the file after this time allmd = download_metadata(connector, host, secret_key, resource['id']) last_processed_time = 0 datapoint_count = 0 for md in allmd: if 'content' in md and 'last processed time' in md['content']: last_processed_time = md['content']['last processed time'] if 'datapoints_created' in md['content']: datapoint_count = md['content']['datapoints_created'] else: datapoint_count = 0 delete_metadata(connector, host, secret_key, resource['id'], md['agent']['name'].split("/")[-1]) # Parse file and get all the records in it. ISO_8601_UTC_OFFSET = dateutil.tz.tzoffset("-07:00", -7 * 60 * 60) records = parse_file(resource["local_paths"][0], last_processed_time, utc_offset=ISO_8601_UTC_OFFSET) # Add props to each record. for record in records: record['properties']['source_file'] = resource['id'] record['stream_id'] = str(stream_id) total_dp = 0 datapoint_list = [] for record in records: datapoint_list.append({ "start_time": record['start_time'], "end_time": record['end_time'], "type": "Point", "geometry": record['geometry'], "properties": record['properties'] }) if len(datapoint_list) > self.batchsize: create_datapoints(connector, host, secret_key, stream_id, datapoint_list) total_dp += len(datapoint_list) datapoint_list = [] if len(datapoint_list) > 0: create_datapoints(connector, host, secret_key, stream_id, datapoint_list) total_dp += len(datapoint_list) # Mark dataset as processed metadata = build_metadata( host, self.extractor_info, resource['id'], { "last processed time": records[-1]["end_time"], "datapoints_created": datapoint_count + total_dp }, 'file') upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # rulechecker provided some key information for us in parameters if type(parameters) is str: parameters = json.loads(parameters) if 'parameters' in parameters: parameters = parameters['parameters'] if type(parameters) is unicode: parameters = json.loads(str(parameters)) dataset_name = parameters["output_dataset"] scan_name = parameters["scan_type"] if "scan_type" in parameters else "" timestamp = dataset_name.split(" - ")[1] # Input path will suggest which sensor we are seeing sensor_name, sensor_lookup = None, None for f in resource['files']: if f['filepath'].find("rgb_geotiff") > -1: sensor_name = "stereoTop" sensor_lookup = "rgb_fullfield" elif f['filepath'].find("ir_geotiff") > -1: sensor_name = "flirIrCamera" sensor_lookup = "ir_fullfield" elif f['filepath'].find("laser3d_heightmap") > -1: sensor_name = "scanner3DTop" sensor_lookup = "laser3d_fullfield" if sensor_lookup is not None: break # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment( timestamp, sensor_name, {}) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output file paths out_tif_full = self.sensors.create_sensor_path( timestamp, sensor=sensor_lookup, opts=[scan_name]).replace(" ", "_") out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif") out_tif_medium = out_tif_full.replace(".tif", "_10pct.tif") out_png = out_tif_full.replace(".tif", ".png") out_vrt = out_tif_full.replace(".tif", ".vrt") out_dir = os.path.dirname(out_vrt) # TODO: Check for L1 version of VRT and _thumb and if the JSON contents match, copy instead of regenerating # If outputs already exist, we don't need to do anything else found_all = True if self.thumb: output_files = [out_vrt, out_tif_thumb] else: output_files = [out_tif_full, out_tif_medium, out_png] for output_file in output_files: if not file_exists(output_file): found_all = False break if found_all and not self.overwrite: if self.thumb: self.log_info( resource, "thumb output already exists; triggering terra.geotiff.fieldmosaic_full" ) r = requests.post( "%sapi/%s/%s/extractions?key=%s" % (host, 'datasets', resource['id'], secret_key), headers={"Content-Type": "application/json"}, data=json.dumps({ "extractor": 'terra.geotiff.fieldmosaic_full', "parameters": parameters })) r.raise_for_status() else: self.log_skip(resource, "all outputs already exist") return # Perform actual field stitching if not self.darker or sensor_lookup != 'rgb_fullfield': (nu_created, nu_bytes) = self.generateSingleMosaic( connector, host, secret_key, out_dir, out_vrt, out_tif_thumb, out_tif_full, out_tif_medium, parameters, resource) else: (nu_created, nu_bytes) = self.generateDarkerMosaic( connector, host, secret_key, out_dir, out_vrt, out_tif_thumb, out_tif_full, out_tif_medium, parameters, resource) self.created += nu_created self.bytes += nu_bytes if not self.thumb and os.path.isfile(out_tif_medium): # Create PNG thumbnail self.log_info(resource, "Converting 10pct to %s..." % out_png) cmd = "gdal_translate -of PNG %s %s" % (out_tif_medium, out_png) subprocess.call(cmd, shell=True) self.created += 1 self.bytes += os.path.getsize(out_png) self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(sensor=sensor_lookup), timestamp[:4], timestamp[5:7])) # Get dataset ID or create it, creating parent collections as needed target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(sensor=sensor_lookup), timestamp[:4], timestamp[5:7], leaf_ds_name=dataset_name) # Upload full field image to Clowder content = { "comment": "This stitched image is computed based on an assumption that the scene is planar. \ There are likely to be be small offsets near the boundary of two images anytime there are plants \ at the boundary (because those plants are higher than the ground plane), or where the dirt is \ slightly higher or lower than average.", "file_ids": parameters["file_paths"] } # If we newly created these files, upload to Clowder if self.thumb: generated_files = [out_tif_thumb] else: generated_files = [out_tif_medium, out_tif_full, out_png] for checked_file in generated_files: if os.path.isfile(checked_file): found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, checked_file) #, replacements=[("ir_fullfield", "fullfield"), ("L2", "L1")]) if not found_in_dest: id = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, checked_file) meta = build_metadata(host, self.extractor_info, id, content, 'file') upload_metadata(connector, host, secret_key, id, meta) if checked_file == out_tif_full: # Trigger downstream extractions on full resolution if sensor_lookup == 'ir_fullfield': submit_extraction(connector, host, secret_key, id, "terra.multispectral.meantemp") elif sensor_lookup == 'rgb_fullfield' and checked_file.endswith( "_mask.tif"): submit_extraction(connector, host, secret_key, id, "terra.stereo-rgb.canopycover") if self.thumb: # TODO: Add parameters support to pyclowder submit_extraction() self.log_info(resource, "triggering terra.geotiff.fieldmosaic_full") r = requests.post("%sapi/%s/%s/extractions?key=%s" % (host, 'datasets', resource['id'], secret_key), headers={"Content-Type": "application/json"}, data=json.dumps({ "extractor": 'terra.geotiff.fieldmosaic_full', "parameters": parameters })) r.raise_for_status() self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) if type(parameters) is str: parameters = json.loads(parameters) if 'parameters' in parameters: parameters = parameters['parameters'] if type(parameters) is unicode: parameters = json.loads(str(parameters)) # Input path will suggest which sensor we are seeing sensor_type = None for f in resource['files']: filepath = f['filepath'] for sens in ["rgb_geotiff", "ir_geotiff", "laser3d_heightmap"]: if filepath.find(sens) > -1: sensor_type = sens.split("_")[0] break if sensor_type is not None: break # dataset_name = "Full Field - 2017-01-01" dataset_name = parameters["output_dataset"] scan_name = parameters["scan_type"] if "scan_type" in parameters else "" timestamp = dataset_name.split(" - ")[1] out_tif_full = self.sensors.create_sensor_path( timestamp, opts=[sensor_type, scan_name]) out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif") out_vrt = out_tif_full.replace(".tif", ".vrt") out_dir = os.path.dirname(out_vrt) if os.path.exists(out_vrt) and not self.overwrite: self.log_skip(resource, "%s already exists; ending process" % out_vrt) return if not self.darker or sensor_type != 'rgb': (nu_created, nu_bytes) = self.generateSingleMosaic( connector, host, secret_key, sensor_type, out_dir, out_vrt, out_tif_thumb, out_tif_full, parameters, resource) else: (nu_created, nu_bytes) = self.generateDarkerMosaic( connector, host, secret_key, sensor_type, out_dir, out_vrt, out_tif_thumb, out_tif_full, parameters, resource) self.created += nu_created self.bytes += nu_bytes # Get dataset ID or create it, creating parent collections as needed target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], leaf_ds_name=dataset_name) # Upload full field image to Clowder content = { "comment": "This stitched image is computed based on an assumption that the scene is planar. \ There are likely to be be small offsets near the boundary of two images anytime there are plants \ at the boundary (because those plants are higher than the ground plane), or where the dirt is \ slightly higher or lower than average.", "file_ids": parameters["file_paths"] } if os.path.exists(out_tif_thumb): thumbid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_tif_thumb) thumbmeta = build_metadata(host, self.extractor_info, thumbid, content, 'file') upload_metadata(connector, host, secret_key, thumbid, thumbmeta) if os.path.exists(out_tif_full): fullid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_tif_full) fullmeta = build_metadata(host, self.extractor_info, fullid, content, 'file') upload_metadata(connector, host, secret_key, fullid, fullmeta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Load metadata from dataset for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd) if 'spatial_metadata' in terra_md_full: spatial_meta = terra_md_full['spatial_metadata'] else: spatial_meta = None if not spatial_meta: ValueError("No spatial metadata found.") # Determine which files in dataset need clipping files_to_process = {} for f in resource['local_paths']: if f.startswith("ir_geotiff") and f.endswith(".tif"): sensor_name = "ir_geotiff" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": spatial_meta['flirIrCamera']['bounding_box'] } elif f.startswith("rgb_geotiff") and f.endswith(".tif"): sensor_name = "rgb_geotiff" filename = os.path.basename(f) if f.endswith("_left.tif"): side = "left" else: side = "right" files_to_process[filename] = { "path": f, "bounds": spatial_meta[side]['bounding_box'] } elif f.endswith(".las"): sensor_name = "laser3d_las" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": get_las_extents(f) } # TODO: Add case for laser3d heightmap # Fetch experiment name from terra metadata timestamp = resource['dataset_info']['name'].split(" - ")[1] season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine script name target_scan = "unknown_scan" if 'gantry_variable_metadata' in terra_md_full: if 'script_name' in terra_md_full['gantry_variable_metadata']: target_scan = terra_md_full['gantry_variable_metadata']['script_name'] if 'script_hash' in terra_md_full['gantry_variable_metadata']: target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash'] all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa') uploaded_file_ids = [] for filename in files_to_process: file_path = files_to_process[filename]["path"] file_bounds = files_to_process[filename]["bounds"] overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True) if len(overlap_plots) > 0: self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots))) for plotname in overlap_plots: plot_bounds = overlap_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds)) plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)" leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0] self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset) out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename) if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite): """If file is a geoTIFF, simply clip it and upload it to Clowder""" clip_raster(file_path, tuples, out_path=out_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) elif filename.endswith(".las"): """If file is LAS, we can merge with any existing scan+plot output safely""" merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las") merged_txt = merged_out.replace(".las", "_contents.txt") already_merged = False if os.path.exists(merged_txt): # Check if contents with open(merged_txt, 'r') as contents: for entry in contents.readlines(): if entry.strip() == file_path: already_merged = True break if not already_merged: clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out) with open(merged_txt, 'a') as contents: contents.write(file_path+"\n") # Upload the individual plot shards for optimizing las2height later found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_file) # Upload the merged result if necessary found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) # Trigger las2height extractor submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height") # Tell Clowder this is completed so subsequent file updates don't daisy-chain extractor_md = build_metadata(host, self.extractor_info, resource['id'], { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to Level_1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() tmp_csv = "meantemptraits.csv" csv_file = open(tmp_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) dsmd = download_metadata(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) #tc = getFlir.rawData_to_temperature(pxarray, terramd) # get temperature # Filter out any pxarray[pxarray < 0] = numpy.nan mean_tc = numpy.nanmean(pxarray) - 273.15 # Create BETY-ready CSV if not numpy.isnan(mean_tc): traits['surface_temperature'] = str(mean_tc) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = generate_traits_list(traits) #generate_csv(tmp_csv, fields, trait_list) csv_file.write(','.join(map(str, trait_list)) + '\n') # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], "surface_temperature": str(mean_tc) } create_datapoint_with_dependencies( connector, host, secret_key, "IR Surface Temperature", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) successful_plots += 1 # submit CSV to BETY csv_file.close() submit_traits(tmp_csv, betykey=self.bety_key) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots, "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata) self.end_message()