def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, terra_md_full = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product if necessary target_md = download_metadata(connector, host, secret_key, target_dsid) if not get_extractor_metadata(target_md, self.extractor_info['name']): self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) try: left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left') gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right') gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) except KeyError: self.log_error(resource, "spatial metadata not properly identified; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return if (not file_exists(left_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % left_tiff) left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None) create_geotiff(left_image, gps_bounds_left, left_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if (not file_exists(right_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % right_tiff) right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None) create_geotiff(right_image, gps_bounds_right, right_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) try: upload_metadata(connector, host, secret_key, resource['id'], extractor_md) except: self.log_info(resource, "problem uploading extractor metadata...") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Load metadata from dataset for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd) if 'spatial_metadata' in terra_md_full: spatial_meta = terra_md_full['spatial_metadata'] else: spatial_meta = None if not spatial_meta: ValueError("No spatial metadata found.") # Determine which files in dataset need clipping files_to_process = {} for f in resource['local_paths']: if f.startswith("ir_geotiff") and f.endswith(".tif"): sensor_name = "ir_geotiff" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": spatial_meta['flirIrCamera']['bounding_box'] } elif f.startswith("rgb_geotiff") and f.endswith(".tif"): sensor_name = "rgb_geotiff" filename = os.path.basename(f) if f.endswith("_left.tif"): side = "left" else: side = "right" files_to_process[filename] = { "path": f, "bounds": spatial_meta[side]['bounding_box'] } elif f.endswith(".las"): sensor_name = "laser3d_las" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": get_las_extents(f) } # TODO: Add case for laser3d heightmap # Fetch experiment name from terra metadata timestamp = resource['dataset_info']['name'].split(" - ")[1] season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine script name target_scan = "unknown_scan" if 'gantry_variable_metadata' in terra_md_full: if 'script_name' in terra_md_full['gantry_variable_metadata']: target_scan = terra_md_full['gantry_variable_metadata']['script_name'] if 'script_hash' in terra_md_full['gantry_variable_metadata']: target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash'] all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa') uploaded_file_ids = [] for filename in files_to_process: file_path = files_to_process[filename]["path"] file_bounds = files_to_process[filename]["bounds"] overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True) if len(overlap_plots) > 0: self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots))) for plotname in overlap_plots: plot_bounds = overlap_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds)) plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)" leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0] self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset) out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename) if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite): """If file is a geoTIFF, simply clip it and upload it to Clowder""" clip_raster(file_path, tuples, out_path=out_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) elif filename.endswith(".las"): """If file is LAS, we can merge with any existing scan+plot output safely""" merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las") merged_txt = merged_out.replace(".las", "_contents.txt") already_merged = False if os.path.exists(merged_txt): # Check if contents with open(merged_txt, 'r') as contents: for entry in contents.readlines(): if entry.strip() == file_path: already_merged = True break if not already_merged: clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out) with open(merged_txt, 'a') as contents: contents.write(file_path+"\n") # Upload the individual plot shards for optimizing las2height later found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_file) # Upload the merged result if necessary found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) # Trigger las2height extractor submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height") # Tell Clowder this is completed so subsequent file updates don't daisy-chain extractor_md = build_metadata(host, self.extractor_info, resource['id'], { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to Level_1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): super(rgbEnhancementExtractor, self).process_message(connector, host, secret_key, resource, parameters) self.start_message(resource) # Get left/right files and metadata process_files = [] if not self.get_terraref_metadata is None: process_files = find_terraref_files(resource) else: process_files = find_image_files(self.args.identify_binary, resource, self.file_infodata_file_ending) # Get the best username, password, and space old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace) self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context() # Ensure that the clowder information is valid if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user, self.clowder_pass): self.log_error(resource, "Clowder configuration is invalid. Not processing " +\ "request") self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space) self.end_message(resource) return # Change the base path of files to include the user by tweaking the sensor's value sensor_old_base = None if self.get_terraref_metadata is None: _, new_base = self.get_username_with_base_path(host, secret_key, resource['id'], self.sensors.base) sensor_old_base = self.sensors.base self.sensors.base = new_base # Prepare for processing files timestamp = timestamp_to_terraref(self.find_timestamp(resource['dataset_info']['name'])) target_dsid = resource['id'] uploaded_file_ids = [] ratios = [] try: for one_file in process_files: mask_source = one_file # Make sure the source image is in the correct EPSG space epsg = get_epsg(one_file) if epsg != self.default_epsg: self.log_info(resource, "Reprojecting from " + str(epsg) + " to default " + str(self.default_epsg)) _, tmp_name = tempfile.mkstemp() src = gdal.Open(one_file) gdal.Warp(tmp_name, src, dstSRS='EPSG:'+str(self.default_epsg)) mask_source = tmp_name # Get the bounds of the image to see if we can process it. Also get the mask filename rgb_mask_tif, bounds = self.get_maskfilename_bounds(mask_source, timestamp) if bounds is None: self.log_skip(resource, "Skipping non-georeferenced image: " + \ os.path.basename(one_file)) if mask_source != one_file: os.remove(mask_source) continue if not file_exists(rgb_mask_tif) or self.overwrite: self.log_info(resource, "creating %s" % rgb_mask_tif) mask_ratio, mask_rgb = gen_cc_enhanced(mask_source) ratios.append(mask_ratio) # Bands must be reordered to avoid swapping R and B mask_rgb = cv2.cvtColor(mask_rgb, cv2.COLOR_BGR2RGB) create_geotiff(mask_rgb, bounds, rgb_mask_tif, None, False, self.extractor_info, self.get_terraref_metadata) compress_geotiff(rgb_mask_tif) # Remove any temporary file if mask_source != one_file: os.remove(mask_source) self.created += 1 self.bytes += os.path.getsize(rgb_mask_tif) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, rgb_mask_tif, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % rgb_mask_tif) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, rgb_mask_tif) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain if not self.get_terraref_metadata is None: ratios_len = len(ratios) left_ratio = (ratios[0] if ratios_len > 0 else None) right_ratio = (ratios[1] if ratios_len > 1 else None) md = { "files_created": uploaded_file_ids } if not left_ratio is None: md["left_mask_ratio"] = left_ratio if not self.leftonly and not right_ratio is None: md["right_mask_ratio"] = right_ratio extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) finally: # Signal end of processing message and restore changed variables. Be sure to restore # changed variables above with early returns if not sensor_old_base is None: self.sensors.base = sensor_old_base self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # rulechecker provided some key information for us in parameters if type(parameters) is str: parameters = json.loads(parameters) if 'parameters' in parameters: parameters = parameters['parameters'] if type(parameters) is unicode: parameters = json.loads(str(parameters)) dataset_name = parameters["output_dataset"] scan_name = parameters["scan_type"] if "scan_type" in parameters else "" timestamp = dataset_name.split(" - ")[1] # Input path will suggest which sensor we are seeing sensor_name, sensor_lookup = None, None for f in resource['files']: if f['filepath'].find("rgb_geotiff") > -1: sensor_name = "stereoTop" sensor_lookup = "rgb_fullfield" elif f['filepath'].find("ir_geotiff") > -1: sensor_name = "flirIrCamera" sensor_lookup = "ir_fullfield" elif f['filepath'].find("laser3d_heightmap") > -1: sensor_name = "scanner3DTop" sensor_lookup = "laser3d_fullfield" if sensor_lookup is not None: break # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment( timestamp, sensor_name, {}) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output file paths out_tif_full = self.sensors.create_sensor_path( timestamp, sensor=sensor_lookup, opts=[scan_name]).replace(" ", "_") out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif") out_tif_medium = out_tif_full.replace(".tif", "_10pct.tif") out_png = out_tif_full.replace(".tif", ".png") out_vrt = out_tif_full.replace(".tif", ".vrt") out_dir = os.path.dirname(out_vrt) # TODO: Check for L1 version of VRT and _thumb and if the JSON contents match, copy instead of regenerating # If outputs already exist, we don't need to do anything else found_all = True if self.thumb: output_files = [out_vrt, out_tif_thumb] else: output_files = [out_tif_full, out_tif_medium, out_png] for output_file in output_files: if not file_exists(output_file): found_all = False break if found_all and not self.overwrite: if self.thumb: self.log_info( resource, "thumb output already exists; triggering terra.geotiff.fieldmosaic_full" ) r = requests.post( "%sapi/%s/%s/extractions?key=%s" % (host, 'datasets', resource['id'], secret_key), headers={"Content-Type": "application/json"}, data=json.dumps({ "extractor": 'terra.geotiff.fieldmosaic_full', "parameters": parameters })) r.raise_for_status() else: self.log_skip(resource, "all outputs already exist") return # Perform actual field stitching if not self.darker or sensor_lookup != 'rgb_fullfield': (nu_created, nu_bytes) = self.generateSingleMosaic( connector, host, secret_key, out_dir, out_vrt, out_tif_thumb, out_tif_full, out_tif_medium, parameters, resource) else: (nu_created, nu_bytes) = self.generateDarkerMosaic( connector, host, secret_key, out_dir, out_vrt, out_tif_thumb, out_tif_full, out_tif_medium, parameters, resource) self.created += nu_created self.bytes += nu_bytes if not self.thumb and os.path.isfile(out_tif_medium): # Create PNG thumbnail self.log_info(resource, "Converting 10pct to %s..." % out_png) cmd = "gdal_translate -of PNG %s %s" % (out_tif_medium, out_png) subprocess.call(cmd, shell=True) self.created += 1 self.bytes += os.path.getsize(out_png) self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(sensor=sensor_lookup), timestamp[:4], timestamp[5:7])) # Get dataset ID or create it, creating parent collections as needed target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(sensor=sensor_lookup), timestamp[:4], timestamp[5:7], leaf_ds_name=dataset_name) # Upload full field image to Clowder content = { "comment": "This stitched image is computed based on an assumption that the scene is planar. \ There are likely to be be small offsets near the boundary of two images anytime there are plants \ at the boundary (because those plants are higher than the ground plane), or where the dirt is \ slightly higher or lower than average.", "file_ids": parameters["file_paths"] } # If we newly created these files, upload to Clowder if self.thumb: generated_files = [out_tif_thumb] else: generated_files = [out_tif_medium, out_tif_full, out_png] for checked_file in generated_files: if os.path.isfile(checked_file): found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, checked_file) #, replacements=[("ir_fullfield", "fullfield"), ("L2", "L1")]) if not found_in_dest: id = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, checked_file) meta = build_metadata(host, self.extractor_info, id, content, 'file') upload_metadata(connector, host, secret_key, id, meta) if checked_file == out_tif_full: # Trigger downstream extractions on full resolution if sensor_lookup == 'ir_fullfield': submit_extraction(connector, host, secret_key, id, "terra.multispectral.meantemp") elif sensor_lookup == 'rgb_fullfield' and checked_file.endswith( "_mask.tif"): submit_extraction(connector, host, secret_key, id, "terra.stereo-rgb.canopycover") if self.thumb: # TODO: Add parameters support to pyclowder submit_extraction() self.log_info(resource, "triggering terra.geotiff.fieldmosaic_full") r = requests.post("%sapi/%s/%s/extractions?key=%s" % (host, 'datasets', resource['id'], secret_key), headers={"Content-Type": "application/json"}, data=json.dumps({ "extractor": 'terra.geotiff.fieldmosaic_full', "parameters": parameters })) r.raise_for_status() self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get BIN file and metadata bin_file, terra_md_full = None, None for f in resource['local_paths']: if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera') elif f.endswith('_ir.bin'): bin_file = f if None in [bin_file, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp) tiff_path = self.sensors.create_sensor_path(timestamp) png_path = tiff_path.replace(".tif", ".png") uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) skipped_png = False if not file_exists(png_path) or self.overwrite: # Perform actual processing self.log_info(resource, "creating & uploading %s" % png_path) raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) create_image(raw_data, png_path, self.scale_values) self.created += 1 self.bytes += os.path.getsize(png_path) else: skipped_png = True # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not file_exists(tiff_path) or self.overwrite: # Generate temperature matrix and perform actual processing self.log_info(resource, "creating & uploading %s" % tiff_path) gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box']) if skipped_png: raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full) self.created += 1 self.bytes += os.path.getsize(tiff_path) # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): """Performs plot level image extraction Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API resource(dict): dictionary containing the resources associated with the request parameters(json): json object of the triggering message contents """ self.start_message(resource) super(ClipByShape, self).process_message(connector, host, secret_key, resource, parameters) # Handle any parameters if isinstance(parameters, basestring): parameters = json.loads(parameters) if isinstance(parameters, unicode): parameters = json.loads(str(parameters)) # Initialize local variables dataset_name = parameters["datasetname"] season_name, experiment_name = "Unknown Season", "Unknown Experiment" datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None # Array containing the links to uploaded files uploaded_file_ids = [] # Find the files we're interested in # pylint: disable=line-too-long (shapefile, shxfile, dbffile, imagefiles) = self.find_shape_image_files(resource['local_paths'], resource['triggering_file']) # pylint: enable=line-too-long if shapefile is None: self.log_skip(resource, "No shapefile found") return if shxfile is None: self.log_skip(resource, "No SHX file found") return num_image_files = len(imagefiles) if num_image_files <= 0: self.log_skip(resource, "No image files with geographic boundaries found") return # Get the best username, password, and space old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace) self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context( ) # Ensure that the clowder information is valid if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user, self.clowder_pass): self.log_error(resource, "Clowder configuration is invalid. Not processing " +\ "request") self.clowder_user, self.clowder_pass, self.clowderspace = ( old_un, old_pw, old_space) self.end_message(resource) return # Change the base path of files to include the user by tweaking the sensor's value sensor_old_base = None if self.get_terraref_metadata is None: _, new_base = self.get_username_with_base_path( host, secret_key, resource['id'], self.sensors.base) sensor_old_base = self.sensors.base self.sensors.base = new_base try: # Build up a list of image IDs image_ids = {} if 'files' in resource: for one_image in imagefiles: image_name = os.path.basename(one_image) for res_file in resource['files']: if ('filename' in res_file) and ('id' in res_file) and \ (image_name == res_file['filename']): image_ids[image_name] = res_file['id'] # Get timestamps. Also get season and experiment information for Clowder collections datestamp = self.find_datestamp(dataset_name) timestamp = timestamp_to_terraref( self.find_timestamp(dataset_name)) (season_name, experiment_name, _) = self.get_season_and_experiment(datestamp, self.sensor_name) if self.experiment_metadata: if 'extractors' in self.experiment_metadata: extractor_json = self.experiment_metadata['extractors'] if 'shapefile' in extractor_json: if 'plot_column_name' in extractor_json['shapefile']: plot_name_idx = extractor_json['shapefile'][ 'plot_column_name'] # Check our current local variables if dbffile is None: self.log_info(resource, "DBF file not found, using default plot naming") self.log_info(resource, "Extracting plots using shapefile '" + \ os.path.basename(shapefile) + "'") # Load the shapes and find the plot name column if we have a DBF file shape_in = ogr.Open(shapefile) layer = shape_in.GetLayer( os.path.split(os.path.splitext(shapefile)[0])[1]) feature = layer.GetNextFeature() layer_ref = layer.GetSpatialRef() if dbffile: shape_table = DBF(dbffile, lowernames=True, ignore_missing_memofile=True) shape_rows = iter(list(shape_table)) # Make sure if we have the column name of plot-names specified that it exists in # the shapefile column_names = shape_table.field_names if not plot_name_idx is None: if not find_all_plot_names(plot_name_idx, column_names): ValueError( resource, "Shapefile data does not have specified plot name" + " column '" + plot_name_idx + "'") # Lookup a plot name field to use if plot_name_idx is None: for one_name in column_names: # pylint: disable=line-too-long if one_name == "observationUnitName": plot_name_idx = one_name break elif (one_name.find('plot') >= 0) and ( (one_name.find('name') >= 0) or one_name.find('id')): plot_name_idx = one_name break elif one_name == 'id': plot_name_idx = one_name break # pylint: enable=line-too-long if plot_name_idx is None: ValueError( resource, "Shapefile data does not have a plot name field '" + os.path.basename(dbffile) + "'") # Setup for the extracted plot images plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \ " (By Plot)" # Loop through each polygon and extract plot level data alternate_plot_id = 0 while feature: # Current geometry to extract plot_poly = feature.GetGeometryRef() if layer_ref: plot_poly.AssignSpatialReference(layer_ref) plot_spatial_ref = plot_poly.GetSpatialReference() # Determie the plot name to use plot_name = None alternate_plot_id = alternate_plot_id + 1 if shape_rows and plot_name_idx: try: row = next(shape_rows) plot_name = get_plot_name(plot_name_idx, row) except StopIteration: pass if not plot_name: plot_name = "plot_" + str(alternate_plot_id) # Determine output dataset name leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, datestamp[:4], datestamp[5:7], datestamp[8:10], leaf_dataset)) # Create the dataset, even if we have no data to put in it, so that the caller knows # it was addressed target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, datestamp[:4], datestamp[5:7], datestamp[8:10], leaf_ds_name=leaf_dataset) # Loop through all the images looking for overlap for filename in imagefiles: # Get the bounds. We also get the reference systems in case we need to convert # between them bounds = imagefiles[filename]['bounds'] bounds_spatial_ref = bounds.GetSpatialReference() # Checking for geographic overlap and skip if there is none if not bounds_spatial_ref.IsSame(plot_spatial_ref): # We need to convert coordinate system before an intersection transform = osr.CoordinateTransformation( bounds_spatial_ref, plot_spatial_ref) new_bounds = bounds.Clone() if new_bounds: new_bounds.Transform(transform) intersection = plot_poly.Intersection(new_bounds) new_bounds = None else: # Same coordinate system. Simple intersection intersection = plot_poly.Intersection(bounds) if intersection.GetArea() == 0.0: self.log_info(resource, "Skipping image: " + filename) continue # Determine where we're putting the clipped file on disk and determine overwrite # pylint: disable=unexpected-keyword-arg out_file = self.sensors.create_sensor_path( timestamp, filename=os.path.basename(filename), plot=plot_name, subsensor=self.sensor_name) if (file_exists(out_file) and not self.overwrite): # The file exists and don't want to overwrite it self.logger.warn("Skipping existing output file: %s", out_file) continue self.log_info( resource, "Attempting to clip '" + filename + "' to polygon number " + str(alternate_plot_id)) # Create destination folder on disk if we haven't done that already if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) # Clip the raster bounds_tuple = polygon_to_tuples_transform( plot_poly, bounds_spatial_ref) clip_pix = clip_raster(filename, bounds_tuple, out_path=out_file) if clip_pix is None: self.log_error( resource, "Failed to clip image to plot name " + plot_name) continue # Upload the clipped image to the dataset found_in_dest = check_file_in_dataset( connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: image_name = os.path.basename(filename) content = { "comment": "Clipped from shapefile " + os.path.basename(shapefile), "imageName": image_name } if image_name in image_ids: content['imageID'] = image_ids[image_name] fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_file) uploaded_file_ids.append(fileid) # Generate our metadata meta = build_metadata(host, self.extractor_info, fileid, content, 'file') clowder_file.upload_metadata(connector, host, secret_key, fileid, meta) else: self.logger.warn( "Skipping existing file in dataset: %s", out_file) self.created += 1 self.bytes += os.path.getsize(out_file) # Get the next shape to extract feature = layer.GetNextFeature() # Tell Clowder this is completed so subsequent file updates don't daisy-chain id_len = len(uploaded_file_ids) if id_len > 0 or self.created > 0: extractor_md = build_metadata( host, self.extractor_info, resource['id'], {"files_created": uploaded_file_ids}, 'dataset') self.log_info( resource, "Uploading shapefile plot extractor metadata to Level_2 dataset: " + str(extractor_md)) clowder_dataset.remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) clowder_dataset.upload_metadata(connector, host, secret_key, resource['id'], extractor_md) else: self.logger.warn( "Skipping dataset metadata updating since no files were loaded" ) finally: # Signal end of processing message and restore changed variables. Be sure to restore # changed variables above with early returns if not sensor_old_base is None: self.sensors.base = sensor_old_base self.clowder_user, self.clowder_pass, self.clowderspace = ( old_un, old_pw, old_space) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] right_ratio, left_ratio = 0, 0 left_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['right']['bounding_box']) #qual_md = get_extractor_metadata(all_dsmd, "terra.stereo-rgb.nrmac") if (not file_exists(left_rgb_mask_tiff)) or self.overwrite: self.log_info(resource, "creating %s" % left_rgb_mask_tiff) #if qual_md and 'left_quality_score' in qual_md: #left_ratio, left_rgb = gen_cc_enhanced(img_left, quality_score=int(qual_md['left_quality_score'])) left_ratio, left_rgb = gen_cc_enhanced(img_left) if left_ratio is not None and left_rgb is not None: # Bands must be reordered to avoid swapping R and B left_rgb = cv2.cvtColor(left_rgb, cv2.COLOR_BGR2RGB) create_geotiff(left_rgb, left_bounds, left_rgb_mask_tiff, None, False, self.extractor_info, terra_md_full) compress_geotiff(left_rgb_mask_tiff) self.created += 1 self.bytes += os.path.getsize(left_rgb_mask_tiff) else: # If the masked version was not generated, delete any old version as well self.log_info( resource, "a faulty version exists; deleting %s" % left_rgb_mask_tiff) os.remove(left_rgb_mask_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_rgb_mask_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_rgb_mask_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_rgb_mask_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not self.leftonly: if (not file_exists(right_rgb_mask_tiff)) or self.overwrite: right_ratio, right_rgb = gen_cc_enhanced(img_right) if right_ratio is not None and right_rgb is not None: # Bands must be reordered to avoid swapping R and B right_rgb = cv2.cvtColor(right_rgb, cv2.COLOR_BGR2RGB) create_geotiff(right_rgb, right_bounds, right_rgb_mask_tiff, None, False, self.extractor_info, terra_md_full) compress_geotiff(right_rgb_mask_tiff) self.created += 1 self.bytes += os.path.getsize(right_rgb_mask_tiff) else: # If the masked version was not generated, delete any old version as well self.log_info( resource, "a faulty version exists; deleting %s" % right_rgb_mask_tiff) os.remove(right_rgb_mask_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_rgb_mask_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_rgb_mask_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_rgb_mask_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: md = { "files_created": uploaded_file_ids, "left_mask_ratio": left_ratio } if not self.leftonly: md["right_mask_ratio"] = right_ratio extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def perform_uploads(self, connector, host, secret_key, resource, default_dsid, content, season_name, experiment_name, timestamp): """Perform the uploading of all the files we're put onto the upload list Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API default_dsid(str): the default dataset to load files to content(str): content information for the files we're uploading season_name(str): the name of the season experiment_name(str): the name of the experiment timestamp(str): the timestamp string associated with the source dataset Notes: We loop through the files, compressing, and remapping the names as needed. If the sensor associated with the file is missing, we upload the file to the default dataset. Otherwise, we use the dataset associated with the sensor and create the dataset if necessary """ for one_file in self.files_to_upload: sourcefile = os.path.join(one_file["source_path"], one_file["source_name"]) # Make sure we have the original file and then compress it if needed, or remane is if os.path.isfile(sourcefile): # make sure we have the full destination path if not os.path.exists(one_file["dest_path"]): os.makedirs(one_file["dest_path"]) resultfile = os.path.join(one_file["dest_path"], one_file["dest_name"]) if one_file["compress"]: resultfile = resultfile + ".zip" with open(sourcefile, 'rb') as f_in: with gzip.open(resultfile, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) elif not sourcefile == resultfile: shutil.move(sourcefile, resultfile) # Find or create the target dataset for this entry if it doesn't exist cur_dataset_id = default_dsid if "sensor" in one_file: sensor_type = one_file["sensor"] if sensor_type in self.sensor_dsid_map: cur_dataset_id = self.sensor_dsid_map[sensor_type] else: new_sensor = Sensors(base=self.sensors.base, station=self.sensors.station, sensor=sensor_type) sensor_leaf_name = new_sensor.get_display_name( ) + ' - ' + timestamp ds_exists = get_datasetid_by_name( host, secret_key, sensor_leaf_name) new_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, new_sensor.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=sensor_leaf_name) if (self.overwrite_ok or not ds_exists) and self.experiment_metadata: self.update_dataset_extractor_metadata( connector, host, secret_key, new_dsid, prepare_pipeline_metadata( self.experiment_metadata), self.extractor_info['name']) self.sensor_dsid_map[sensor_type] = new_dsid cur_dataset_id = new_dsid # Check if file already exists in the dataset file_in_dataset = check_file_in_dataset(connector, host, secret_key, cur_dataset_id, resultfile, remove=False) # If the files is already in the dataset, determine if we need to delete it first if self.overwrite_ok and file_in_dataset: # Delete the file from the dataset before uploading the new copy self.log_info( resource, "Removing existing file in dataset " + resultfile) check_file_in_dataset(connector, host, secret_key, cur_dataset_id, resultfile, remove=True) elif not self.overwrite_ok and file_in_dataset: # We won't overwrite an existing file self.log_skip( resource, "Not overwriting existing file in dataset " + resultfile) continue # Upload the file to the dataset fid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, cur_dataset_id, resultfile) # Generate our metadata meta = build_metadata(host, self.extractor_info, fid, content, 'file') # Upload the meadata to the dataset upload_metadata(connector, host, secret_key, fid, meta) self.created += 1 self.bytes += os.path.getsize(resultfile) else: raise Exception("%s was not found" % sourcefile)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] left_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['right']['bounding_box']) if not file_exists(left_rgb_enh_tiff) or self.overwrite: self.log_info(resource, "creating %s" % left_rgb_enh_tiff) EI = getEnhancedImage(img_left) create_geotiff(EI, left_bounds, left_rgb_enh_tiff) self.created += 1 self.bytes += os.path.getsize(left_rgb_enh_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_rgb_enh_tiff, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % left_rgb_enh_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_rgb_enh_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not file_exists(right_rgb_enh_tiff) or self.overwrite: self.log_info(resource, "creating %s" % right_rgb_enh_tiff) EI = getEnhancedImage(img_right) create_geotiff(EI, right_bounds, right_rgb_enh_tiff) self.created += 1 self.bytes += os.path.getsize(right_rgb_enh_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_rgb_enh_tiff, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % right_rgb_enh_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_rgb_enh_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) upload_metadata(connector, host, secret_key, target_dsid, ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # clean tmp directory from any potential failed previous runs flist = os.listdir("/tmp") for f in flist: try: os.remove(os.path.join("/tmp", f)) except: pass """ if file is above configured limit, skip it max_gb = 24 # RAM has 4x requirement, e.g. 24GB requires 96GB RAM for fname in resource['local_paths']: if fname.endswith('raw'): rawfile = fname rawsize = os.stat(rawfile).st_size if rawsize > max_gb * 1000000000: self.log_skip(resource, "filesize %sGB exceeds available RAM" % int(rawsize/1000000000)) return False """ timestamp = resource['dataset_info']['name'].split(" - ")[1] if resource['dataset_info']['name'].find("SWIR") > -1: sensor_rawname = 'SWIR' sensor_fullname = 'swir_netcdf' soil_mask = None else: sensor_rawname = 'VNIR' sensor_fullname = 'vnir_netcdf' # Check for corresponding soil mask to include in workflow.sh if available soil_mask = self.sensors.get_sensor_path(timestamp, sensor='vnir_soil_masks', opts=['soil_mask']) out_nc = self.sensors.create_sensor_path(timestamp, sensor=sensor_fullname) xps_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['xps']) ind_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['ind']) csv_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname.replace( "_netcdf", "_traits")) raw_file, terra_md_full = None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, sensor_rawname) elif fname.endswith('raw'): raw_file = fname if None in [raw_file, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment( timestamp, sensor_rawname, terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory print_name = self.sensors.get_display_name(sensor=sensor_fullname) self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, print_name, timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, print_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) uploaded_file_ids = [] # Perform actual processing if (not file_exists(out_nc)) or self.overwrite: """TODO: OLD AND NOT USED self.log_info(resource, 'invoking hyperspectral_workflow.sh to create: %s' % out_nc) if soil_mask and file_exists(soil_mask): # If soil mask exists, we can generate an _ind indices file returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h", "-m", soil_mask, "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth else: # Otherwise we cannot, and need to trigger soilmask extractor and circle back later returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h", "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth if returncode != 0: raise ValueError('script encountered an error') """ self.log_info(resource, 'invoking python calibration to create: %s' % out_nc) create_empty_netCDF(raw_file, out_nc) self.log_info(resource, 'applying calibration to: %s' % out_nc) apply_calibration(raw_file, out_nc) self.log_info(resource, '...done' % raw_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_nc, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_nc) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_nc) # TODO: Still compatible? #if not soil_mask: # self.log_info(resource, "triggering soil mask extractor on %s" % fileid) # submit_extraction(connector, host, secret_key, fileid, "terra.sunshade.soil_removal") # TODO: Sent output to BETYdb """ # Send indices to betyDB if file_exists(ind_file): # TODO: Use ncks to trim ind_file to plots before this step plot_no = 'Full Field' with Dataset(ind_file, "r") as netCDF_handle: ndvi = netCDF_handle.get_variables_by_attributes(standard_name='normalized_difference_chlorophyll_index_750_705') NDVI705 = ndvi[0].getValue().ravel()[0] # TODO: Map the remaining ~50 variables in BETY to create indices file # TODO: In netCDF header, csv_header = 'local_datetime,NDVI705,access_level,species,site,' \ 'citation_author,citation_year,citation_title,method' csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \ 'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % ( timestamp, NDVI705, plot_no) with open(csv_file, 'w') as c: c.write(csv_header+'\n'+csv_vals) # TODO: Send this CSV to betydb & geostreams extractors instead submit_traits(csv_file, bety_key=self.bety_key) """ self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] self.log_info(resource, "determining image quality") left_qual = getImageQuality(img_left) if not self.leftonly: right_qual = getImageQuality(img_right) left_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) if not file_exists(left_nrmac_tiff) or self.overwrite: self.log_info(resource, "creating %s" % left_nrmac_tiff) create_geotiff(np.array([[left_qual, left_qual],[left_qual, left_qual]]), left_bounds, left_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_nrmac_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_nrmac_tiff, remove=self.overwrite) if not found_in_dest or self.overwrite: self.log_info(resource, "uploading %s" % left_nrmac_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_nrmac_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not self.leftonly: if (not file_exists(right_nrmac_tiff) or self.overwrite): self.log_info(resource, "creating %s" % right_nrmac_tiff) create_geotiff(np.array([[right_qual, right_qual],[right_qual, right_qual]]), right_bounds, right_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_nrmac_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_nrmac_tiff, remove=self.overwrite) if not found_in_dest or self.overwrite: self.log_info(resource, "uploading %s" % right_nrmac_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_nrmac_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain md = { "files_created": uploaded_file_ids, "left_quality_score": left_qual } if not self.leftonly: md["right_quality_score"] = right_qual extractor_md = build_metadata(host, self.extractor_info, resource['id'], md, 'file') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)