def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.bin', '_right.bin']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if not self.overwrite and get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if file_exists(left_tiff) and file_exists(right_tiff): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_error(resource, "no terraref metadata found; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return CheckMessage.ignore
def submit_missing_timestamps(sensor_name, target, date): sensordef = count_defs[sensor_name] targetdef = sensordef[target] extractorname = targetdef["extractor"] submitted = [] notfound = [] if "parent" in targetdef: # Count expected parent counts & actual current progress counts from filesystem parentdef = sensordef[targetdef["parent"]] parent_dir = os.path.join(parentdef["path"], date) target_dir = os.path.join(targetdef["path"], date) parent_timestamps = os.listdir(parent_dir) if os.path.isdir(target_dir): target_timestamps = os.listdir(target_dir) else: target_timestamps = [] disp_name = Sensors("", "ua-mac").get_display_name(targetdef["parent"]) missing = list(set(parent_timestamps)-set(target_timestamps)) for ts in missing: if ts.find("-") > -1 and ts.find("__") > -1: dataset_name = disp_name+" - "+ts raw_dsid = get_dsid_by_name(dataset_name) if raw_dsid: submit_extraction(CONN, CLOWDER_HOST, CLOWDER_KEY, raw_dsid, extractorname) submitted.append({"name": dataset_name, "id": raw_dsid}) else: notfound.append({"name": dataset_name}) return json.dumps({ "extractor": extractorname, "datasets submitted": submitted, "datasets not found": notfound })
def submit_rulecheck(sensor_name, target, date): sensordef = count_defs[sensor_name] targetdef = sensordef[target] submitted = [] s = Sensors("", "ua-mac") if "parent" in targetdef: target_dir = os.path.join(sensordef[targetdef["parent"]]["path"], date) target_timestamps = os.listdir(target_dir) disp_name = s.get_display_name(targetdef["parent"]) for ts in target_timestamps: if ts.find("-") > -1 and ts.find("__") > -1: # TODO: and os.listdir(os.path.join(target_dir, ts)): # Get first populated timestamp for the date that has a Clowder ID dataset_name = disp_name+" - "+ts raw_dsid = get_dsid_by_name(dataset_name) if raw_dsid: # Submit associated Clowder ID to rulechecker submit_extraction(CONN, CLOWDER_HOST, CLOWDER_KEY, raw_dsid, "ncsa.rulechecker.terra") submitted.append({"name": dataset_name, "id": raw_dsid}) break return json.dumps({ "extractor": "ncsa.rulechecker.terra", "datasets submitted": submitted })
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.tif', '_right.tif']): self.log_skip(resource, "missing required files") # Check for raw_data_source in metadata and resumbit to bin2tif if available... md = download_metadata(connector, host, secret_key, resource['id']) terra_md = get_terraref_metadata(md) if 'raw_data_source' in terra_md: raw_id = str(terra_md['raw_data_source'].split("/")[-1]) self.log_info(resource, "submitting raw source %s to bin2tif" % raw_id) submit_extraction(connector, host, secret_key, raw_id, "terra.stereo-rgb.bin2tif") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if (self.leftonly and file_exists(left_nrmac_tiff)) or ( not self.leftonly and file_exists(left_nrmac_tiff) and file_exists(right_nrmac_tiff)): if contains_required_files(resource, [os.path.basename(left_nrmac_tiff)]): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore else: self.log_info(resource, "output file exists but not yet uploaded") # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def submit_missing_timestamps_from_rulechecker(sensor_name, target, date): sensordef = count_defs[sensor_name] targetdef = sensordef[target] extractorname = targetdef["extractor"] submitted = [] notfound = [] if "parent" in targetdef: # Count expected parent counts from filesystem parentdef = sensordef[targetdef["parent"]] parent_dir = os.path.join(parentdef["path"], date) parent_timestamps = os.listdir(parent_dir) # Count actual current progress counts from PSQL psql_conn = connect_to_psql() target_timestamps = [] query_string = targetdef["query_list"] % date curs = psql_conn.cursor() curs.execute(query_string) for result in curs: target_timestamps.append(result[0].split("/")[-2]) disp_name = Sensors("", "ua-mac").get_display_name(targetdef["parent"]) missing = list(set(parent_timestamps)-set(target_timestamps)) for ts in missing: if ts.find("-") > -1 and ts.find("__") > -1: dataset_name = disp_name+" - "+ts raw_dsid = get_dsid_by_name(dataset_name) if raw_dsid: submit_extraction(CONN, CLOWDER_HOST, CLOWDER_KEY, raw_dsid, extractorname) submitted.append({"name": dataset_name, "id": raw_dsid}) else: notfound.append({"name": dataset_name}) return json.dumps({ "extractor": extractorname, "datasets submitted": submitted, "datasets not found": notfound })
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Load metadata from dataset for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd) if 'spatial_metadata' in terra_md_full: spatial_meta = terra_md_full['spatial_metadata'] else: spatial_meta = None if not spatial_meta: ValueError("No spatial metadata found.") # Determine which files in dataset need clipping files_to_process = {} for f in resource['local_paths']: if f.startswith("ir_geotiff") and f.endswith(".tif"): sensor_name = "ir_geotiff" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": spatial_meta['flirIrCamera']['bounding_box'] } elif f.startswith("rgb_geotiff") and f.endswith(".tif"): sensor_name = "rgb_geotiff" filename = os.path.basename(f) if f.endswith("_left.tif"): side = "left" else: side = "right" files_to_process[filename] = { "path": f, "bounds": spatial_meta[side]['bounding_box'] } elif f.endswith(".las"): sensor_name = "laser3d_las" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": get_las_extents(f) } # TODO: Add case for laser3d heightmap # Fetch experiment name from terra metadata timestamp = resource['dataset_info']['name'].split(" - ")[1] season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine script name target_scan = "unknown_scan" if 'gantry_variable_metadata' in terra_md_full: if 'script_name' in terra_md_full['gantry_variable_metadata']: target_scan = terra_md_full['gantry_variable_metadata']['script_name'] if 'script_hash' in terra_md_full['gantry_variable_metadata']: target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash'] all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa') uploaded_file_ids = [] for filename in files_to_process: file_path = files_to_process[filename]["path"] file_bounds = files_to_process[filename]["bounds"] overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True) if len(overlap_plots) > 0: self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots))) for plotname in overlap_plots: plot_bounds = overlap_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds)) plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)" leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0] self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset) out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename) if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite): """If file is a geoTIFF, simply clip it and upload it to Clowder""" clip_raster(file_path, tuples, out_path=out_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) elif filename.endswith(".las"): """If file is LAS, we can merge with any existing scan+plot output safely""" merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las") merged_txt = merged_out.replace(".las", "_contents.txt") already_merged = False if os.path.exists(merged_txt): # Check if contents with open(merged_txt, 'r') as contents: for entry in contents.readlines(): if entry.strip() == file_path: already_merged = True break if not already_merged: clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out) with open(merged_txt, 'a') as contents: contents.write(file_path+"\n") # Upload the individual plot shards for optimizing las2height later found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_file) # Upload the merged result if necessary found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) # Trigger las2height extractor submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height") # Tell Clowder this is completed so subsequent file updates don't daisy-chain extractor_md = build_metadata(host, self.extractor_info, resource['id'], { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to Level_1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, terra_md_full = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product if necessary target_md = download_metadata(connector, host, secret_key, target_dsid) if not get_extractor_metadata(target_md, self.extractor_info['name']): self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) try: left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left') gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right') gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) except KeyError: self.log_error(resource, "spatial metadata not properly identified; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return if (not file_exists(left_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % left_tiff) left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None) create_geotiff(left_image, gps_bounds_left, left_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if (not file_exists(right_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % right_tiff) right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None) create_geotiff(right_image, gps_bounds_right, right_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) try: upload_metadata(connector, host, secret_key, resource['id'], extractor_md) except: self.log_info(resource, "problem uploading extractor metadata...") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) sensor_type, timestamp = resource['name'].split(" - ") # First, re-check metadata to verify it hasn't been added in meantime ds_md = download_metadata(connector, host, secret_key, resource['id']) terra_md = get_terraref_metadata(ds_md) if terra_md: self.log_info(resource, "Found TERRA-REF metadata; not cleaning") return # These datasets do not have TERRA md uncleanables = ["Full Field"] if sensor_type in uncleanables: self.log_info(resource, "Cannot clean metadata for %s" % sensor_type) return # For these datasets, we must get TERRA md from raw_data source lv1_types = {"RGB GeoTIFFs": "stereoTop", "Thermal IR GeoTIFFs": "flirIrCamera"} if sensor_type in lv1_types: raw_equiv = resource['name'].replace(sensor_type, lv1_types[sensor_type]) source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(raw_equiv)) else: # Search for metadata.json source file source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(resource['name'])) source_dir = self.remapMountPath(connector, source_dir) if self.delete: # Delete all existing metadata from this dataset self.log_info(resource, "Deleting existing metadata") delete_dataset_metadata(host, self.clowder_user, self.clowder_pass, resource['id']) # TODO: split between the PLY files (in Level_1) and metadata.json files - unique to this sensor if sensor_type == "scanner3DTop": source_dir = source_dir.replace("Level_1", "raw_data") self.log_info(resource, "Searching for metadata.json in %s" % source_dir) if os.path.isdir(source_dir): md_file = None for f in os.listdir(source_dir): if f.endswith("metadata.json"): md_file = os.path.join(source_dir, f) if md_file: self.log_info(resource, "Found metadata.json; cleaning") md_json = clean_metadata(load_json_file(md_file), sensor_type) format_md = { "@context": ["https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld", {"@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#"}], "content": md_json, "agent": { "@type": "cat:user", "user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % self.userid } } self.log_info(resource, "Uploading cleaned metadata") upload_metadata(connector, host, secret_key, resource['id'], format_md) # Now trigger a callback extraction if given if len(self.callback) > 0: self.log_info(resource, "Submitting callback extraction to %s" % self.callback) submit_extraction(connector, host, secret_key, resource['id'], self.callback) else: callbacks = self.get_callbacks_by_sensor(sensor_type) if callbacks: for c in callbacks: self.log_info(resource, "Submitting callback extraction to %s" % c) submit_extraction(connector, host, secret_key, resource['id'], c) else: self.log_info(resource, "No default callback found for %s" % sensor_type) else: self.log_error(resource, "metadata.json not found in %s" % source_dir) else: self.log_error(resource, "%s could not be found" % source_dir) # TODO: Have extractor check for existence of Level_1 output product and delete if exists? self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get BIN file and metadata bin_file, terra_md_full = None, None for f in resource['local_paths']: if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera') elif f.endswith('_ir.bin'): bin_file = f if None in [bin_file, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp) tiff_path = self.sensors.create_sensor_path(timestamp) png_path = tiff_path.replace(".tif", ".png") uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) skipped_png = False if not file_exists(png_path) or self.overwrite: # Perform actual processing self.log_info(resource, "creating & uploading %s" % png_path) raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) create_image(raw_data, png_path, self.scale_values) self.created += 1 self.bytes += os.path.getsize(png_path) else: skipped_png = True # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not file_exists(tiff_path) or self.overwrite: # Generate temperature matrix and perform actual processing self.log_info(resource, "creating & uploading %s" % tiff_path) gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box']) if skipped_png: raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full) self.created += 1 self.bytes += os.path.getsize(tiff_path) # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() sensor_type, timestamp = resource['name'].split(" - ") targets = self.get_targets_by_sensor(sensor_type) source = self.get_source_by_sensor(sensor_type) existing_files = {} for t in targets: for f in resource['files']: if f['filename'].endswith(t): logging.getLogger(__name__).info("Found %s" % f['filename']) existing_files[t] = f['filename'] break if len(existing_files) == len(targets): logging.getLogger(__name__).info("Target files already exist") # If there are bin2tif files previously created, are they valid? dsmd = download_metadata(connector, host, secret_key, resource['id']) for md in dsmd: if 'extractor_id' in md['agent'] and md['agent']['extractor_id'].endswith(source): # Found bin2tif metadata - are previously created files valid? logging.getLogger(__name__).info("Found metadata from %s" % source) for url in md['content']['files_created']: fid = url.split("/")[-1] i = download_info(connector, host, secret_key, fid) i = self.remapMountPath(connector, i['filepath']) logging.getLogger(__name__).info("Checking validity of %s" % i) if not os.path.isfile(i): # Found invalid file - nuke the entire site from orbit logging.getLogger(__name__).info("Invalid; deleting metadata") self.delete_dataset_metadata(host, self.clowder_user, self.clowder_pass, resource['id'], source) # Now trigger a callback extraction if given if len(self.callback) > 0: logging.getLogger(__name__).info("Submitting callback extraction to %s" % self.callback) submit_extraction(connector, host, secret_key, resource['id'], self.callback) else: callbacks = self.get_callbacks_by_sensor(sensor_type) if callbacks: for c in callbacks: logging.getLogger(__name__).info("Submitting callback extraction to %s" % c) submit_extraction(connector, host, secret_key, resource['id'], c) else: logging.getLogger(__name__).info("No default callback found for %s" % sensor_type) break else: # Search for target source files source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(resource['name'])) source_dir = self.remapMountPath(connector, source_dir) if sensor_type == "scanner3DTop": source_dir = source_dir.replace("Level_1", "raw_data") logging.getLogger(__name__).info("Searching for target files in %s" % source_dir) if os.path.isdir(source_dir): targ_files = {} for f in os.listdir(source_dir): for t in targets: if f.endswith(t): targ_files[t] = os.path.join(source_dir, f) break if targ_files != {}: for t in targ_files: logging.getLogger(__name__).info("Uploading %s" % (targ_files[t])) upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['id'], targ_files[t]) # Now trigger a callback extraction if given if len(self.callback) > 0: logging.getLogger(__name__).info("Submitting callback extraction to %s" % self.callback) submit_extraction(connector, host, secret_key, resource['id'], self.callback) else: callbacks = self.get_callbacks_by_sensor(sensor_type) if callbacks: for c in callbacks: logging.getLogger(__name__).info("Submitting callback extraction to %s" % c) submit_extraction(connector, host, secret_key, resource['id'], c) else: logging.getLogger(__name__).info("No default callback found for %s" % sensor_type) else: logging.getLogger(__name__).error("targets not found in %s" % source_dir) else: logging.getLogger(__name__).info("%s could not be found" % source_dir)
"@type": "cat:user", "user_id": "%sapi/users/%s" % (clowder_host, clowder_uid) } } if not dry_run: file_id = upload_to_dataset(conn, clowder_host, clowder_user, clowder_pass, snap_dataset, img_path) logger.debug("Created file %s [%s]" % (img_file, file_id)) file_md["file_id"] = file_id upload_file_metadata(conn, clowder_host, clowder_key, file_id, file_md) logger.debug("Uploaded metadata to [%s]" % file_id) else: logger.debug("Skipping file %s [%s]" % (img_file, "DRY RUN")) # Submit new dataset for extraction to plantCV extractor if not dry_run: extractor = "terra.lemnatec.plantcv" logger.debug("Submitting dataset [%s] to %s" % (snap_dataset, extractor)) submit_extraction(conn, clowder_host, clowder_key, snap_dataset, extractor) logger.debug("Experiment uploading complete.") else: logger.debug("%s does not exist" % experiment_root) sys.exit(1)
CONN = Connector(None, mounted_paths={"/home/clowder/sites": args.sites}) logging.info("attempting to parse %s" % args.input) sess = requests.Session() if args.daily: seen_days = [] with open(args.input, 'r') as csv: i = 0 for line in csv: ds_id, ds_name = line.replace("\n", "").split(",") if len(ds_id) > 0: if args.daily: day = ds_name.split(" - ")[1].split("__")[0] if day in seen_days: continue else: seen_days.append(day) try: submit_extraction(CONN, args.host, args.key, ds_id, args.extractor) except Exception as e: logging.info("failed to submit %s [%s]" % (ds_id, e)) i += 1 if (i % 1000 == 0): logging.info("submitted %s files" % i) if args.test: logging.info("submitted %s" % ds_id) break logging.info("processing completed")