def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.bin', '_right.bin']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if not self.overwrite and get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if file_exists(left_tiff) and file_exists(right_tiff): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_error(resource, "no terraref metadata found; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for an _ir.bin file before beginning processing if not contains_required_files(resource, ['_ir.bin']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] tif = self.sensors.get_sensor_path(timestamp) png = tif.replace(".tif", ".png") if file_exists(png) and file_exists(tif): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.tif', '_right.tif']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if not self.force: # Check NRMAC score > 15 before proceeding if available nrmac_md = get_extractor_metadata(md, "terra.stereo-rgb.nrmac") if not (nrmac_md and 'left_quality_score' in nrmac_md): self.log_skip(resource, "NRMAC quality score not available") return CheckMessage.ignore elif float(nrmac_md['left_quality_score']) > self.threshold: self.log_skip( resource, "NRMAC quality score %s is above threshold of %s" % (float( nrmac_md['left_quality_score']), self.threshold)) return CheckMessage.ignore if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_enh_tiff = self.sensors.create_sensor_path( timestamp, opts=['right']) if file_exists(left_enh_tiff) and file_exists(right_enh_tiff): if contains_required_files(resource, [ os.path.basename(left_enh_tiff), os.path.basename(right_enh_tiff) ]): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore else: self.log_info( resource, "output files exist but not yet uploaded") # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_error(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore if len(resource['files']) >= 23: md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): timestamp = resource['name'].split(" - ")[1] out_fullday_netcdf = self.sensors.create_sensor_path(timestamp) out_fullday_csv = out_fullday_netcdf.replace(".nc", "_geo.csv") if file_exists(out_fullday_netcdf) and file_exists( out_fullday_csv): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore return CheckMessage.download else: self.log_skip(resource, "found less than 23 files") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore if not contains_required_files( resource, ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']): self.log_skip(resource, "missing required files") return CheckMessage.ignore if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' else: sensor_fullname = 'vnir_netcdf' timestamp = resource['dataset_info']['name'].split(" - ")[1] md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist out_nc = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname) if file_exists(out_nc): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") # See if we can recover it from disk if sensor_fullname == 'vnir_netcdf': date = timestamp.split("__")[0] source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % ( date, timestamp) for f in os.listdir(source_dir): if f.endswith("_metadata.json"): self.log_info(resource, "updating metadata from %s" % f) raw_dsmd = load_json_file(os.path.join(source_dir, f)) clean_md = clean_metadata(raw_dsmd, 'VNIR') complete_md = build_metadata(host, self.extractor_info, resource['id'], clean_md, 'dataset') remove_metadata(connector, host, secret_key, resource['id']) upload_metadata(connector, host, secret_key, resource['id'], complete_md) return CheckMessage.download return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.tif', '_right.tif']): self.log_skip(resource, "missing required files") # Check for raw_data_source in metadata and resumbit to bin2tif if available... md = download_metadata(connector, host, secret_key, resource['id']) terra_md = get_terraref_metadata(md) if 'raw_data_source' in terra_md: raw_id = str(terra_md['raw_data_source'].split("/")[-1]) self.log_info(resource, "submitting raw source %s to bin2tif" % raw_id) submit_extraction(connector, host, secret_key, raw_id, "terra.stereo-rgb.bin2tif") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if (self.leftonly and file_exists(left_nrmac_tiff)) or ( not self.leftonly and file_exists(left_nrmac_tiff) and file_exists(right_nrmac_tiff)): if contains_required_files(resource, [os.path.basename(left_nrmac_tiff)]): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore else: self.log_info(resource, "output file exists but not yet uploaded") # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): # Check for a left and right TIF file - skip if not found # If we're only processing the left files, don't check for the right file needed_files = ['_left.tif'] if not self.leftonly: needed_files.append('_right.tif') if not contains_required_files(resource, needed_files): self.log_skip(resource, "missing required files") return CheckMessage.ignore if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if (self.leftonly and file_exists(left_mask_tiff)) or \ (not (file_exists(left_mask_tiff) and file_exists(right_mask_tiff))): self.log_skip(resource, "metadata v%s and outputs already exist" % \ self.extractor_info['version']) return CheckMessage.ignore # Check for other images to create a mask on elif not contains_required_files(resource, ['.tif']): self.log_skip(resource, "missing required tiff file") return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download
def file_is_image_type(identify_binary, filename, metadata_filename=None): """Uses the identify application to generate the MIME type of the file and looks for an image MIME type. If a metadata filename is specified, the JSON in the file is loaded first and the MIME type is looked for. If the metadata filename is not specified, or a MIME type was not found in the metadata, the identity application is used. Args: identify_binary(str): path to the executable which will return a MIME type on an image file filename(str): the path to the file to check metadata_filename(str): the path to JSON metadata associated with the file in which to look for a 'contentType' tag containing the MIME type Returns: True is returned if the file is a MIME image type False is returned upon failure or the file is not a type of image """ logger = logging.getLogger(__name__) # Try to determine the file type from its JSON information (metadata if from Clowder API) try: if metadata_filename and file_exists(metadata_filename): file_md = load_json_file(metadata_filename) if file_md: if 'contentType' in file_md: if file_md['contentType'].startswith('image'): return True # pylint: disable=broad-except except Exception as ex: logger.info("Exception caught: %s", str(ex)) # pylint: enable=broad-except # Try to determine the file type locally try: is_image_type = find_image_mime_type( subprocess.check_output([identify_binary, "-verbose", filename], stderr=subprocess.STDOUT)) if not is_image_type is None: return is_image_type # pylint: disable=broad-except except Exception as ex: logger.info("Exception caught: %s", str(ex)) # pylint: enable=broad-except return False
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Build list of JSON files json_files = [] for f in resource['files']: if f['filename'].endswith("_environmentlogger.json"): if f['filepath'].startswith("/home/clowder"): json_files.append(f['filepath'].replace( "/home/clowder", "/home/extractor")) else: json_files.append(f['filepath']) json_files.sort() # Determine full output path timestamp = resource['name'].split(" - ")[1] out_fullday_netcdf = self.sensors.create_sensor_path(timestamp) temp_out_full = os.path.join(os.path.dirname(out_fullday_netcdf), "temp_full.nc") temp_out_single = temp_out_full.replace("_full.nc", "_single.nc") geo_csv = out_fullday_netcdf.replace(".nc", "_geo.csv") if not file_exists(temp_out_full): for json_file in json_files: self.log_info( resource, "converting %s to netCDF & appending" % os.path.basename(json_file)) ela.mainProgramTrigger(json_file, temp_out_single) cmd = "ncrcat --record_append %s %s" % (temp_out_single, temp_out_full) subprocess.call([cmd], shell=True) os.remove(temp_out_single) shutil.move(temp_out_full, out_fullday_netcdf) self.created += 1 self.bytes += os.path.getsize(out_fullday_netcdf) # Write out geostreams.csv if not file_exists(geo_csv): self.log_info(resource, "writing geostreams CSV") geo_file = open(geo_csv, 'w') geo_file.write(','.join([ 'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp' ]) + '\n') with Dataset(out_fullday_netcdf, "r") as ncdf: streams = set([ sensor_info.name for sensor_info in ncdf.variables.values() if sensor_info.name.startswith('sensor') ]) for stream in streams: if stream != "sensor_spectrum": try: memberlist = ncdf.get_variables_by_attributes( sensor=stream) for members in memberlist: data_points = _produce_attr_dict(members) for index in range(len(data_points)): dp_obj = data_points[index] if dp_obj["sensor"] == stream: time_format = "%Y-%m-%dT%H:%M:%S-07:00" time_point = (datetime.datetime(year=1970, month=1, day=1) + \ datetime.timedelta(days=ncdf.variables["time"][index])).strftime(time_format) geo_file.write(','.join([ "Full Field - Environmental Logger", "(EL) %s" % stream, str(33.075576), str(-111.974304), time_point, host + ("" if host.endswith("/") else "/" ) + "datasets/" + resource['id'], '"%s"' % json.dumps(dp_obj). replace('"', '""'), timestamp ]) + '\n') except: self.log_error( resource, "NetCDF attribute not found: %s" % stream) # Fetch dataset ID by dataset name if not provided target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, None, None, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) ds_files = get_file_list(connector, host, secret_key, target_dsid) found_full = False found_csv = False for f in ds_files: if f['filename'] == os.path.basename(out_fullday_netcdf): found_full = True if f['filename'] == os.path.basename(geo_csv): found_csv = True if not found_full: upload_to_dataset(connector, host, secret_key, target_dsid, out_fullday_netcdf) if not found_csv: geoid = upload_to_dataset(connector, host, secret_key, target_dsid, geo_csv) self.log_info(resource, "triggering geostreams extractor on %s" % geoid) submit_extraction(connector, host, secret_key, geoid, "terra.geostreams") # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"output_dataset": target_dsid}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def generateDarkerMosaic(self, connector, host, secret_key, out_dir, out_vrt, out_tif_thumb, out_tif_full, out_tif_medium, parameters, resource): # Create dark-pixel mosaic from geotiff list using multipass for darker pixel selection created, bytes = 0, 0 if ((os.path.isfile(out_vrt) and os.path.getsize(out_vrt) == 0) or (not os.path.isfile(out_vrt)) or self.overwrite): fileidpath = self.remapMountPath(connector, str(parameters['file_paths'])) with open(fileidpath) as flist: file_path_list = json.load(flist) self.log_info( resource, "processing %s TIFs with dark flag" % len(file_path_list)) # Write input list to tmp file tiflist = "tiflist.txt" with open(tiflist, "w") as tifftxt: for tpath in file_path_list: filepath = self.remapMountPath(connector, tpath) tifftxt.write("%s\n" % filepath) # Create VRT from every GeoTIFF self.log_info(resource, "Creating VRT %s..." % out_vrt) full_day_to_tiles.createVrtPermanent(out_dir, tiflist, out_vrt) created += 1 bytes += os.path.getsize(out_vrt) # Split full tiflist into parts according to split number shade.split_tif_list(tiflist, out_dir, self.split) # Generate tiles from each split VRT into numbered folders shade.create_diff_tiles_set(out_dir, self.split) # Choose darkest pixel from each overlapping tile unite_tiles_dir = os.path.join(out_dir, 'unite') if not os.path.exists(unite_tiles_dir): os.mkdir(unite_tiles_dir) shade.integrate_tiles(out_dir, unite_tiles_dir, self.split) # If any files didn't have overlap, copy individual tile shade.copy_missing_tiles(out_dir, unite_tiles_dir, self.split, tiles_folder_name='tiles_left') # Create output VRT from overlapped tiles shade.create_unite_tiles(unite_tiles_dir, out_vrt) created += 1 bytes += os.path.getsize(out_vrt) if (not file_exists(out_tif_thumb)) or self.overwrite: self.log_info(resource, "Converting VRT to %s..." % out_tif_thumb) subprocess.call( "gdal_translate -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 " + "-outsize %s%% %s%% %s %s" % (2, 2, out_vrt, out_tif_thumb), shell=True) created += 1 bytes += os.path.getsize(out_tif_thumb) if not self.thumb: if (not file_exists(out_tif_medium)) or self.overwrite: self.log_info(resource, "Converting VRT to %s..." % out_tif_medium) subprocess.call( "gdal_translate -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 " + "-outsize %s%% %s%% %s %s" % (10, 10, out_vrt, out_tif_medium), shell=True) created += 1 bytes += os.path.getsize(out_tif_medium) if self.full and (not file_exists(out_tif_full) or self.overwrite): if (not os.path.isfile(out_tif_full)) or self.overwrite: logging.info("Converting VRT to %s..." % out_tif_full) subprocess.call( "gdal_translate -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 " + "%s %s" % (out_vrt, out_tif_full), shell=True) created += 1 bytes += os.path.getsize(out_tif_full) return (created, bytes)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # rulechecker provided some key information for us in parameters if type(parameters) is str: parameters = json.loads(parameters) if 'parameters' in parameters: parameters = parameters['parameters'] if type(parameters) is unicode: parameters = json.loads(str(parameters)) dataset_name = parameters["output_dataset"] scan_name = parameters["scan_type"] if "scan_type" in parameters else "" timestamp = dataset_name.split(" - ")[1] # Input path will suggest which sensor we are seeing sensor_name, sensor_lookup = None, None for f in resource['files']: if f['filepath'].find("rgb_geotiff") > -1: sensor_name = "stereoTop" sensor_lookup = "rgb_fullfield" elif f['filepath'].find("ir_geotiff") > -1: sensor_name = "flirIrCamera" sensor_lookup = "ir_fullfield" elif f['filepath'].find("laser3d_heightmap") > -1: sensor_name = "scanner3DTop" sensor_lookup = "laser3d_fullfield" if sensor_lookup is not None: break # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment( timestamp, sensor_name, {}) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output file paths out_tif_full = self.sensors.create_sensor_path( timestamp, sensor=sensor_lookup, opts=[scan_name]).replace(" ", "_") out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif") out_tif_medium = out_tif_full.replace(".tif", "_10pct.tif") out_png = out_tif_full.replace(".tif", ".png") out_vrt = out_tif_full.replace(".tif", ".vrt") out_dir = os.path.dirname(out_vrt) # TODO: Check for L1 version of VRT and _thumb and if the JSON contents match, copy instead of regenerating # If outputs already exist, we don't need to do anything else found_all = True if self.thumb: output_files = [out_vrt, out_tif_thumb] else: output_files = [out_tif_full, out_tif_medium, out_png] for output_file in output_files: if not file_exists(output_file): found_all = False break if found_all and not self.overwrite: if self.thumb: self.log_info( resource, "thumb output already exists; triggering terra.geotiff.fieldmosaic_full" ) r = requests.post( "%sapi/%s/%s/extractions?key=%s" % (host, 'datasets', resource['id'], secret_key), headers={"Content-Type": "application/json"}, data=json.dumps({ "extractor": 'terra.geotiff.fieldmosaic_full', "parameters": parameters })) r.raise_for_status() else: self.log_skip(resource, "all outputs already exist") return # Perform actual field stitching if not self.darker or sensor_lookup != 'rgb_fullfield': (nu_created, nu_bytes) = self.generateSingleMosaic( connector, host, secret_key, out_dir, out_vrt, out_tif_thumb, out_tif_full, out_tif_medium, parameters, resource) else: (nu_created, nu_bytes) = self.generateDarkerMosaic( connector, host, secret_key, out_dir, out_vrt, out_tif_thumb, out_tif_full, out_tif_medium, parameters, resource) self.created += nu_created self.bytes += nu_bytes if not self.thumb and os.path.isfile(out_tif_medium): # Create PNG thumbnail self.log_info(resource, "Converting 10pct to %s..." % out_png) cmd = "gdal_translate -of PNG %s %s" % (out_tif_medium, out_png) subprocess.call(cmd, shell=True) self.created += 1 self.bytes += os.path.getsize(out_png) self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(sensor=sensor_lookup), timestamp[:4], timestamp[5:7])) # Get dataset ID or create it, creating parent collections as needed target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(sensor=sensor_lookup), timestamp[:4], timestamp[5:7], leaf_ds_name=dataset_name) # Upload full field image to Clowder content = { "comment": "This stitched image is computed based on an assumption that the scene is planar. \ There are likely to be be small offsets near the boundary of two images anytime there are plants \ at the boundary (because those plants are higher than the ground plane), or where the dirt is \ slightly higher or lower than average.", "file_ids": parameters["file_paths"] } # If we newly created these files, upload to Clowder if self.thumb: generated_files = [out_tif_thumb] else: generated_files = [out_tif_medium, out_tif_full, out_png] for checked_file in generated_files: if os.path.isfile(checked_file): found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, checked_file) #, replacements=[("ir_fullfield", "fullfield"), ("L2", "L1")]) if not found_in_dest: id = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, checked_file) meta = build_metadata(host, self.extractor_info, id, content, 'file') upload_metadata(connector, host, secret_key, id, meta) if checked_file == out_tif_full: # Trigger downstream extractions on full resolution if sensor_lookup == 'ir_fullfield': submit_extraction(connector, host, secret_key, id, "terra.multispectral.meantemp") elif sensor_lookup == 'rgb_fullfield' and checked_file.endswith( "_mask.tif"): submit_extraction(connector, host, secret_key, id, "terra.stereo-rgb.canopycover") if self.thumb: # TODO: Add parameters support to pyclowder submit_extraction() self.log_info(resource, "triggering terra.geotiff.fieldmosaic_full") r = requests.post("%sapi/%s/%s/extractions?key=%s" % (host, 'datasets', resource['id'], secret_key), headers={"Content-Type": "application/json"}, data=json.dumps({ "extractor": 'terra.geotiff.fieldmosaic_full', "parameters": parameters })) r.raise_for_status() self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): super(rgbEnhancementExtractor, self).process_message(connector, host, secret_key, resource, parameters) self.start_message(resource) # Get left/right files and metadata process_files = [] if not self.get_terraref_metadata is None: process_files = find_terraref_files(resource) else: process_files = find_image_files(self.args.identify_binary, resource, self.file_infodata_file_ending) # Get the best username, password, and space old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace) self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context() # Ensure that the clowder information is valid if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user, self.clowder_pass): self.log_error(resource, "Clowder configuration is invalid. Not processing " +\ "request") self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space) self.end_message(resource) return # Change the base path of files to include the user by tweaking the sensor's value sensor_old_base = None if self.get_terraref_metadata is None: _, new_base = self.get_username_with_base_path(host, secret_key, resource['id'], self.sensors.base) sensor_old_base = self.sensors.base self.sensors.base = new_base # Prepare for processing files timestamp = timestamp_to_terraref(self.find_timestamp(resource['dataset_info']['name'])) target_dsid = resource['id'] uploaded_file_ids = [] ratios = [] try: for one_file in process_files: mask_source = one_file # Make sure the source image is in the correct EPSG space epsg = get_epsg(one_file) if epsg != self.default_epsg: self.log_info(resource, "Reprojecting from " + str(epsg) + " to default " + str(self.default_epsg)) _, tmp_name = tempfile.mkstemp() src = gdal.Open(one_file) gdal.Warp(tmp_name, src, dstSRS='EPSG:'+str(self.default_epsg)) mask_source = tmp_name # Get the bounds of the image to see if we can process it. Also get the mask filename rgb_mask_tif, bounds = self.get_maskfilename_bounds(mask_source, timestamp) if bounds is None: self.log_skip(resource, "Skipping non-georeferenced image: " + \ os.path.basename(one_file)) if mask_source != one_file: os.remove(mask_source) continue if not file_exists(rgb_mask_tif) or self.overwrite: self.log_info(resource, "creating %s" % rgb_mask_tif) mask_ratio, mask_rgb = gen_cc_enhanced(mask_source) ratios.append(mask_ratio) # Bands must be reordered to avoid swapping R and B mask_rgb = cv2.cvtColor(mask_rgb, cv2.COLOR_BGR2RGB) create_geotiff(mask_rgb, bounds, rgb_mask_tif, None, False, self.extractor_info, self.get_terraref_metadata) compress_geotiff(rgb_mask_tif) # Remove any temporary file if mask_source != one_file: os.remove(mask_source) self.created += 1 self.bytes += os.path.getsize(rgb_mask_tif) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, rgb_mask_tif, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % rgb_mask_tif) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, rgb_mask_tif) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain if not self.get_terraref_metadata is None: ratios_len = len(ratios) left_ratio = (ratios[0] if ratios_len > 0 else None) right_ratio = (ratios[1] if ratios_len > 1 else None) md = { "files_created": uploaded_file_ids } if not left_ratio is None: md["left_mask_ratio"] = left_ratio if not self.leftonly and not right_ratio is None: md["right_mask_ratio"] = right_ratio extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) finally: # Signal end of processing message and restore changed variables. Be sure to restore # changed variables above with early returns if not sensor_old_base is None: self.sensors.base = sensor_old_base self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space) self.end_message(resource)
def generateSingleMosaic(self, connector, host, secret_key, out_dir, out_vrt, out_tif_thumb, out_tif_full, out_tif_medium, parameters, resource): # Create simple mosaic from geotiff list created, bytes = 0, 0 #if (os.path.isfile(out_vrt) and os.path.getsize(out_vrt) == 0) or (not os.path.isfile(out_vrt)) or self.overwrite: fileidpath = self.remapMountPath(connector, str(parameters['file_paths'])) with open(fileidpath) as flist: file_path_list = json.load(flist) self.log_info( resource, "processing %s TIFs without dark flag" % len(file_path_list)) # Write input list to tmp file tiflist = "tiflist.txt" with open(tiflist, "w") as tifftxt: for tpath in file_path_list: filepath = self.remapMountPath(connector, tpath) tifftxt.write("%s\n" % filepath) if (self.thumb and ((not file_exists(out_vrt)) or self.overwrite)) or ( not self.thumb and (not file_exists(out_vrt))): # Create VRT from every GeoTIFF self.log_info(resource, "Creating VRT %s..." % out_vrt) if out_vrt.endswith("_mask.vrt"): full_day_to_tiles.createVrtPermanent(out_dir, tiflist, out_vrt, alpha=True) else: full_day_to_tiles.createVrtPermanent(out_dir, tiflist, out_vrt) os.remove(tiflist) created += 1 bytes += os.path.getsize(out_vrt) if (not file_exists(out_tif_thumb)) or self.overwrite: # Omit _mask.vrt from 2% if not (out_vrt.endswith('_mask.vrt')): self.log_info(resource, "Converting VRT to %s..." % out_tif_thumb) cmd = "gdal_translate -co COMPRESS=LZW -co BIGTIFF=YES -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 " + \ "-outsize %s%% %s%% %s %s" % (2, 2, out_vrt, out_tif_thumb) subprocess.call(cmd, shell=True) created += 1 bytes += os.path.getsize(out_tif_thumb) if not self.thumb: if (not file_exists(out_tif_medium)) or self.overwrite: # Omit _mask.vrt and _nrmac.vrt from 10% if not (out_vrt.endswith('_mask.vrt') or out_vrt.endswith('_nrmac.vrt')): self.log_info(resource, "Converting VRT to %s..." % out_tif_medium) cmd = "gdal_translate -co COMPRESS=LZW -co BIGTIFF=YES -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 " + \ "-outsize %s%% %s%% %s %s" % (10, 10, out_vrt, out_tif_medium) subprocess.call(cmd, shell=True) created += 1 bytes += os.path.getsize(out_tif_medium) if (not file_exists(out_tif_full)) or self.overwrite: # Omit _nrmac.vrt from 100% if not out_vrt.endswith('_nrmac.vrt'): logging.info("Converting VRT to %s..." % out_tif_full) cmd = "gdal_translate -co COMPRESS=LZW -co BIGTIFF=YES -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 " + \ "%s %s" % (out_vrt, out_tif_full) subprocess.call(cmd, shell=True) created += 1 bytes += os.path.getsize(out_tif_full) return (created, bytes)
def process_message(self, connector, host, secret_key, resource, parameters): """Performs plot level image extraction Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API resource(dict): dictionary containing the resources associated with the request parameters(json): json object of the triggering message contents """ self.start_message(resource) super(ClipByShape, self).process_message(connector, host, secret_key, resource, parameters) # Handle any parameters if isinstance(parameters, basestring): parameters = json.loads(parameters) if isinstance(parameters, unicode): parameters = json.loads(str(parameters)) # Initialize local variables dataset_name = parameters["datasetname"] season_name, experiment_name = "Unknown Season", "Unknown Experiment" datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None # Array containing the links to uploaded files uploaded_file_ids = [] # Find the files we're interested in # pylint: disable=line-too-long (shapefile, shxfile, dbffile, imagefiles) = self.find_shape_image_files(resource['local_paths'], resource['triggering_file']) # pylint: enable=line-too-long if shapefile is None: self.log_skip(resource, "No shapefile found") return if shxfile is None: self.log_skip(resource, "No SHX file found") return num_image_files = len(imagefiles) if num_image_files <= 0: self.log_skip(resource, "No image files with geographic boundaries found") return # Get the best username, password, and space old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace) self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context( ) # Ensure that the clowder information is valid if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user, self.clowder_pass): self.log_error(resource, "Clowder configuration is invalid. Not processing " +\ "request") self.clowder_user, self.clowder_pass, self.clowderspace = ( old_un, old_pw, old_space) self.end_message(resource) return # Change the base path of files to include the user by tweaking the sensor's value sensor_old_base = None if self.get_terraref_metadata is None: _, new_base = self.get_username_with_base_path( host, secret_key, resource['id'], self.sensors.base) sensor_old_base = self.sensors.base self.sensors.base = new_base try: # Build up a list of image IDs image_ids = {} if 'files' in resource: for one_image in imagefiles: image_name = os.path.basename(one_image) for res_file in resource['files']: if ('filename' in res_file) and ('id' in res_file) and \ (image_name == res_file['filename']): image_ids[image_name] = res_file['id'] # Get timestamps. Also get season and experiment information for Clowder collections datestamp = self.find_datestamp(dataset_name) timestamp = timestamp_to_terraref( self.find_timestamp(dataset_name)) (season_name, experiment_name, _) = self.get_season_and_experiment(datestamp, self.sensor_name) if self.experiment_metadata: if 'extractors' in self.experiment_metadata: extractor_json = self.experiment_metadata['extractors'] if 'shapefile' in extractor_json: if 'plot_column_name' in extractor_json['shapefile']: plot_name_idx = extractor_json['shapefile'][ 'plot_column_name'] # Check our current local variables if dbffile is None: self.log_info(resource, "DBF file not found, using default plot naming") self.log_info(resource, "Extracting plots using shapefile '" + \ os.path.basename(shapefile) + "'") # Load the shapes and find the plot name column if we have a DBF file shape_in = ogr.Open(shapefile) layer = shape_in.GetLayer( os.path.split(os.path.splitext(shapefile)[0])[1]) feature = layer.GetNextFeature() layer_ref = layer.GetSpatialRef() if dbffile: shape_table = DBF(dbffile, lowernames=True, ignore_missing_memofile=True) shape_rows = iter(list(shape_table)) # Make sure if we have the column name of plot-names specified that it exists in # the shapefile column_names = shape_table.field_names if not plot_name_idx is None: if not find_all_plot_names(plot_name_idx, column_names): ValueError( resource, "Shapefile data does not have specified plot name" + " column '" + plot_name_idx + "'") # Lookup a plot name field to use if plot_name_idx is None: for one_name in column_names: # pylint: disable=line-too-long if one_name == "observationUnitName": plot_name_idx = one_name break elif (one_name.find('plot') >= 0) and ( (one_name.find('name') >= 0) or one_name.find('id')): plot_name_idx = one_name break elif one_name == 'id': plot_name_idx = one_name break # pylint: enable=line-too-long if plot_name_idx is None: ValueError( resource, "Shapefile data does not have a plot name field '" + os.path.basename(dbffile) + "'") # Setup for the extracted plot images plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \ " (By Plot)" # Loop through each polygon and extract plot level data alternate_plot_id = 0 while feature: # Current geometry to extract plot_poly = feature.GetGeometryRef() if layer_ref: plot_poly.AssignSpatialReference(layer_ref) plot_spatial_ref = plot_poly.GetSpatialReference() # Determie the plot name to use plot_name = None alternate_plot_id = alternate_plot_id + 1 if shape_rows and plot_name_idx: try: row = next(shape_rows) plot_name = get_plot_name(plot_name_idx, row) except StopIteration: pass if not plot_name: plot_name = "plot_" + str(alternate_plot_id) # Determine output dataset name leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, datestamp[:4], datestamp[5:7], datestamp[8:10], leaf_dataset)) # Create the dataset, even if we have no data to put in it, so that the caller knows # it was addressed target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, datestamp[:4], datestamp[5:7], datestamp[8:10], leaf_ds_name=leaf_dataset) # Loop through all the images looking for overlap for filename in imagefiles: # Get the bounds. We also get the reference systems in case we need to convert # between them bounds = imagefiles[filename]['bounds'] bounds_spatial_ref = bounds.GetSpatialReference() # Checking for geographic overlap and skip if there is none if not bounds_spatial_ref.IsSame(plot_spatial_ref): # We need to convert coordinate system before an intersection transform = osr.CoordinateTransformation( bounds_spatial_ref, plot_spatial_ref) new_bounds = bounds.Clone() if new_bounds: new_bounds.Transform(transform) intersection = plot_poly.Intersection(new_bounds) new_bounds = None else: # Same coordinate system. Simple intersection intersection = plot_poly.Intersection(bounds) if intersection.GetArea() == 0.0: self.log_info(resource, "Skipping image: " + filename) continue # Determine where we're putting the clipped file on disk and determine overwrite # pylint: disable=unexpected-keyword-arg out_file = self.sensors.create_sensor_path( timestamp, filename=os.path.basename(filename), plot=plot_name, subsensor=self.sensor_name) if (file_exists(out_file) and not self.overwrite): # The file exists and don't want to overwrite it self.logger.warn("Skipping existing output file: %s", out_file) continue self.log_info( resource, "Attempting to clip '" + filename + "' to polygon number " + str(alternate_plot_id)) # Create destination folder on disk if we haven't done that already if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) # Clip the raster bounds_tuple = polygon_to_tuples_transform( plot_poly, bounds_spatial_ref) clip_pix = clip_raster(filename, bounds_tuple, out_path=out_file) if clip_pix is None: self.log_error( resource, "Failed to clip image to plot name " + plot_name) continue # Upload the clipped image to the dataset found_in_dest = check_file_in_dataset( connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: image_name = os.path.basename(filename) content = { "comment": "Clipped from shapefile " + os.path.basename(shapefile), "imageName": image_name } if image_name in image_ids: content['imageID'] = image_ids[image_name] fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_file) uploaded_file_ids.append(fileid) # Generate our metadata meta = build_metadata(host, self.extractor_info, fileid, content, 'file') clowder_file.upload_metadata(connector, host, secret_key, fileid, meta) else: self.logger.warn( "Skipping existing file in dataset: %s", out_file) self.created += 1 self.bytes += os.path.getsize(out_file) # Get the next shape to extract feature = layer.GetNextFeature() # Tell Clowder this is completed so subsequent file updates don't daisy-chain id_len = len(uploaded_file_ids) if id_len > 0 or self.created > 0: extractor_md = build_metadata( host, self.extractor_info, resource['id'], {"files_created": uploaded_file_ids}, 'dataset') self.log_info( resource, "Uploading shapefile plot extractor metadata to Level_2 dataset: " + str(extractor_md)) clowder_dataset.remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) clowder_dataset.upload_metadata(connector, host, secret_key, resource['id'], extractor_md) else: self.logger.warn( "Skipping dataset metadata updating since no files were loaded" ) finally: # Signal end of processing message and restore changed variables. Be sure to restore # changed variables above with early returns if not sensor_old_base is None: self.sensors.base = sensor_old_base self.clowder_user, self.clowder_pass, self.clowderspace = ( old_un, old_pw, old_space) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # clean tmp directory from any potential failed previous runs flist = os.listdir("/tmp") for f in flist: try: os.remove(os.path.join("/tmp", f)) except: pass """ if file is above configured limit, skip it max_gb = 24 # RAM has 4x requirement, e.g. 24GB requires 96GB RAM for fname in resource['local_paths']: if fname.endswith('raw'): rawfile = fname rawsize = os.stat(rawfile).st_size if rawsize > max_gb * 1000000000: self.log_skip(resource, "filesize %sGB exceeds available RAM" % int(rawsize/1000000000)) return False """ timestamp = resource['dataset_info']['name'].split(" - ")[1] if resource['dataset_info']['name'].find("SWIR") > -1: sensor_rawname = 'SWIR' sensor_fullname = 'swir_netcdf' soil_mask = None else: sensor_rawname = 'VNIR' sensor_fullname = 'vnir_netcdf' # Check for corresponding soil mask to include in workflow.sh if available soil_mask = self.sensors.get_sensor_path(timestamp, sensor='vnir_soil_masks', opts=['soil_mask']) out_nc = self.sensors.create_sensor_path(timestamp, sensor=sensor_fullname) xps_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['xps']) ind_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['ind']) csv_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname.replace( "_netcdf", "_traits")) raw_file, terra_md_full = None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, sensor_rawname) elif fname.endswith('raw'): raw_file = fname if None in [raw_file, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment( timestamp, sensor_rawname, terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory print_name = self.sensors.get_display_name(sensor=sensor_fullname) self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, print_name, timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, print_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) uploaded_file_ids = [] # Perform actual processing if (not file_exists(out_nc)) or self.overwrite: """TODO: OLD AND NOT USED self.log_info(resource, 'invoking hyperspectral_workflow.sh to create: %s' % out_nc) if soil_mask and file_exists(soil_mask): # If soil mask exists, we can generate an _ind indices file returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h", "-m", soil_mask, "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth else: # Otherwise we cannot, and need to trigger soilmask extractor and circle back later returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h", "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth if returncode != 0: raise ValueError('script encountered an error') """ self.log_info(resource, 'invoking python calibration to create: %s' % out_nc) create_empty_netCDF(raw_file, out_nc) self.log_info(resource, 'applying calibration to: %s' % out_nc) apply_calibration(raw_file, out_nc) self.log_info(resource, '...done' % raw_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_nc, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_nc) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_nc) # TODO: Still compatible? #if not soil_mask: # self.log_info(resource, "triggering soil mask extractor on %s" % fileid) # submit_extraction(connector, host, secret_key, fileid, "terra.sunshade.soil_removal") # TODO: Sent output to BETYdb """ # Send indices to betyDB if file_exists(ind_file): # TODO: Use ncks to trim ind_file to plots before this step plot_no = 'Full Field' with Dataset(ind_file, "r") as netCDF_handle: ndvi = netCDF_handle.get_variables_by_attributes(standard_name='normalized_difference_chlorophyll_index_750_705') NDVI705 = ndvi[0].getValue().ravel()[0] # TODO: Map the remaining ~50 variables in BETY to create indices file # TODO: In netCDF header, csv_header = 'local_datetime,NDVI705,access_level,species,site,' \ 'citation_author,citation_year,citation_title,method' csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \ 'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % ( timestamp, NDVI705, plot_no) with open(csv_file, 'w') as c: c.write(csv_header+'\n'+csv_vals) # TODO: Send this CSV to betydb & geostreams extractors instead submit_traits(csv_file, bety_key=self.bety_key) """ self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get BIN file and metadata bin_file, terra_md_full = None, None for f in resource['local_paths']: if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera') elif f.endswith('_ir.bin'): bin_file = f if None in [bin_file, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp) tiff_path = self.sensors.create_sensor_path(timestamp) png_path = tiff_path.replace(".tif", ".png") uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) skipped_png = False if not file_exists(png_path) or self.overwrite: # Perform actual processing self.log_info(resource, "creating & uploading %s" % png_path) raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) create_image(raw_data, png_path, self.scale_values) self.created += 1 self.bytes += os.path.getsize(png_path) else: skipped_png = True # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not file_exists(tiff_path) or self.overwrite: # Generate temperature matrix and perform actual processing self.log_info(resource, "creating & uploading %s" % tiff_path) gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box']) if skipped_png: raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full) self.created += 1 self.bytes += os.path.getsize(tiff_path) # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] left_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['right']['bounding_box']) if not file_exists(left_rgb_enh_tiff) or self.overwrite: self.log_info(resource, "creating %s" % left_rgb_enh_tiff) EI = getEnhancedImage(img_left) create_geotiff(EI, left_bounds, left_rgb_enh_tiff) self.created += 1 self.bytes += os.path.getsize(left_rgb_enh_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_rgb_enh_tiff, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % left_rgb_enh_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_rgb_enh_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not file_exists(right_rgb_enh_tiff) or self.overwrite: self.log_info(resource, "creating %s" % right_rgb_enh_tiff) EI = getEnhancedImage(img_right) create_geotiff(EI, right_bounds, right_rgb_enh_tiff) self.created += 1 self.bytes += os.path.getsize(right_rgb_enh_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_rgb_enh_tiff, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % right_rgb_enh_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_rgb_enh_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) upload_metadata(connector, host, secret_key, target_dsid, ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, terra_md_full = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product if necessary target_md = download_metadata(connector, host, secret_key, target_dsid) if not get_extractor_metadata(target_md, self.extractor_info['name']): self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) try: left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left') gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right') gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) except KeyError: self.log_error(resource, "spatial metadata not properly identified; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return if (not file_exists(left_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % left_tiff) left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None) create_geotiff(left_image, gps_bounds_left, left_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if (not file_exists(right_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % right_tiff) right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None) create_geotiff(right_image, gps_bounds_right, right_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) try: upload_metadata(connector, host, secret_key, resource['id'], extractor_md) except: self.log_info(resource, "problem uploading extractor metadata...") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): """Process the message requesting the ODM extractor to run Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API resource(dict): dictionary containing the resources associated with the request parameters(json): json object of the triggering message contents """ # Start of message processing self.start_message(resource) TerrarefExtractor.process_message(self, connector, host, secret_key, resource, parameters) # Handle any parameters if isinstance(parameters, basestring): parameters = json.loads(parameters) if isinstance(parameters, unicode): parameters = json.loads(str(parameters)) # Array of files to upload once processing is done self.files_to_upload = [] # Our cache of files to upload self.cache_folder = tempfile.mkdtemp() # We are only handling one sensor type here. ODM generates additional sensor outputs # that may not be available for upload; we handle those as we see them in upload_file() # above sensor_type = "rgb" # Initialize more local variables scan_name = parameters["scan_type"] if "scan_type" in parameters else "" # Setup overrides and get the restore function restore_fn = self.setup_overrides(host, secret_key, resource) if not restore_fn: self.end_message(resource) return try: # Get the best timestamp timestamp = timestamp_to_terraref( self.find_timestamp(resource, resource['dataset_info']['name'])) season_name, experiment_name, _ = self.get_season_and_experiment( timestamp, self.sensor_name) # Generate the file names out_tif_full = self.sensors.get_sensor_path( timestamp, opts=[sensor_type, scan_name]).replace(" ", "_") out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif") out_tif_medium = out_tif_full.replace(".tif", "_10pct.tif") out_png = out_tif_medium.replace(".tif", ".png") out_dir = os.path.dirname(out_tif_full) # Generate dictionary of sensor output folders and file names sensor_maps = { sensor_type: { "dir": out_dir, "name": os.path.basename(out_tif_full) } } fsm = self.filename_sensor_maps for one_map in fsm: cur_sensor = fsm[one_map] if not cur_sensor in sensor_maps: sensor_path = self.sensors.get_sensor_path( timestamp, sensor=cur_sensor, opts=[cur_sensor, scan_name]).replace(" ", "_") sensor_maps[cur_sensor] = { "dir": os.path.dirname(sensor_path), "name": os.path.basename(sensor_path) } self.sensor_maps = sensor_maps # Only generate what we need to by checking files on disk thumb_exists, med_exists, full_exists, png_exists, only_png = \ False, False, False, False, False if file_exists(out_tif_thumb): thumb_exists = True if file_exists(out_tif_medium): med_exists = True if file_exists(out_tif_full): full_exists = True if file_exists(out_png): png_exists = True if thumb_exists and med_exists and full_exists and not self.overwrite_ok: if png_exists: self.log_skip(resource, "all outputs already exist") return else: self.log_info(resource, "all outputs already exist (10% PNG thumbnail must" \ " still be generated)") only_png = True # If we need the whole set of files, create them if not only_png: # Override the output file name. We don't save anything here because we'll override # it the next time through self.args.orthophotoname = os.path.splitext( os.path.basename(out_tif_full))[0] # Run the stitch process OpenDroneMapStitch.process_message(self, connector, host, secret_key, resource, parameters) # Look up the name of the full sized orthomosaic basename = os.path.basename(out_tif_full) srcname = None for f in self.files_to_upload: if f["dest_name"] == basename: srcname = os.path.join(self.cache_folder, f["source_name"]) break # Generate other file sizes from the original orthomosaic if srcname and not file_exists(out_tif_medium): self.log_info( resource, "Converting orthomosaic to %s..." % out_tif_medium) outname = os.path.join(self.cache_folder, os.path.basename(out_tif_medium)) cmd = "gdal_translate -outsize %s%% %s%% %s %s" % ( 10, 10, srcname, outname) subprocess.call(cmd, shell=True) if srcname and not file_exists(out_tif_thumb): self.log_info( resource, "Converting orthomosaic to %s..." % out_tif_thumb) outname = os.path.join(self.cache_folder, os.path.basename(out_tif_thumb)) cmd = "gdal_translate -outsize %s%% %s%% %s %s" % ( 2, 2, srcname, outname) subprocess.call(cmd, shell=True) # We're here due to possibly needing the PNG Thumbnail srcname = os.path.join(self.cache_folder, os.path.basename(out_tif_medium)) if (only_png or not png_exists) and file_exists(srcname): # Create PNG thumbnail self.log_info(resource, "Converting 10pct to %s..." % out_png) outname = os.path.join(self.cache_folder, os.path.basename(out_png)) cmd = "gdal_translate -of PNG %s %s" % (srcname, outname) subprocess.call(cmd, shell=True) # Get dataset ID or create it, creating parent collections as needed leaf_ds_name = self.sensors.get_display_name() + ' - ' + timestamp ds_exists = get_datasetid_by_name(host, secret_key, leaf_ds_name) target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_ds_name) if (self.overwrite_ok or not ds_exists) and self.experiment_metadata: self.update_dataset_extractor_metadata( connector, host, secret_key, target_dsid, prepare_pipeline_metadata(self.experiment_metadata), self.extractor_info['name']) # Store our dataset mappings for possible later use self.sensor_dsid_map = {sensor_type: target_dsid} # Upload full field image to Clowder file_ids = [] if "files" in resource: for one_file in resource["files"]: file_ids.append(one_file.get("id", "")) content = { "comment": "This stitched file is computed using OpenDroneMap. Change the" \ " parameters in extractors-opendronemap.txt to change the results.", "source_file_ids": ", ".join(file_ids) } # If we newly created these files, upload to Clowder file_name = os.path.basename(out_tif_thumb) file_path = os.path.join(self.cache_folder, file_name) if file_exists(file_path) and not thumb_exists: self.files_to_upload.append({ "source_path": self.cache_folder, "source_name": file_name, "dest_path": out_dir, "dest_name": file_name, "compress": False }) file_name = os.path.basename(out_tif_medium) file_path = os.path.join(self.cache_folder, file_name) if file_exists(file_path) and not med_exists: self.files_to_upload.append({ "source_path": self.cache_folder, "source_name": file_name, "dest_path": out_dir, "dest_name": file_name, "compress": False }) file_name = os.path.basename(out_png) file_path = os.path.join(self.cache_folder, file_name) if file_exists(file_path) and not png_exists: self.files_to_upload.append({ "source_path": self.cache_folder, "source_name": file_name, "dest_path": out_dir, "dest_name": file_name, "compress": False }) # The main orthomosaic is already getting uploaded, but we must make sure its path # is correct srcname = os.path.basename(out_tif_full).lower() for one_file in self.files_to_upload: file_name = os.path.basename(one_file["dest_name"]).lower() if file_name == srcname: one_file["dest_path"] = os.path.dirname(out_tif_full) break # This function uploads the files into their appropriate datasets self.perform_uploads(connector, host, secret_key, resource, target_dsid, content, season_name, experiment_name, timestamp) # Cleanup the all destination folders skipping over ones that are in our "base" path # (we want to keep those) base = self.sensors.base if not self.cache_folder.startswith(base): check_delete_folder(self.cache_folder) for sp in self.sensor_maps: if not self.sensor_maps[sp]["dir"].startswith(base): check_delete_folder(self.sensor_maps[sp]["dir"]) finally: # We are done, restore fields we've modified (also be sure to restore fields in the # early returns in the code above) if restore_fn: restore_fn() self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] right_ratio, left_ratio = 0, 0 left_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['right']['bounding_box']) #qual_md = get_extractor_metadata(all_dsmd, "terra.stereo-rgb.nrmac") if (not file_exists(left_rgb_mask_tiff)) or self.overwrite: self.log_info(resource, "creating %s" % left_rgb_mask_tiff) #if qual_md and 'left_quality_score' in qual_md: #left_ratio, left_rgb = gen_cc_enhanced(img_left, quality_score=int(qual_md['left_quality_score'])) left_ratio, left_rgb = gen_cc_enhanced(img_left) if left_ratio is not None and left_rgb is not None: # Bands must be reordered to avoid swapping R and B left_rgb = cv2.cvtColor(left_rgb, cv2.COLOR_BGR2RGB) create_geotiff(left_rgb, left_bounds, left_rgb_mask_tiff, None, False, self.extractor_info, terra_md_full) compress_geotiff(left_rgb_mask_tiff) self.created += 1 self.bytes += os.path.getsize(left_rgb_mask_tiff) else: # If the masked version was not generated, delete any old version as well self.log_info( resource, "a faulty version exists; deleting %s" % left_rgb_mask_tiff) os.remove(left_rgb_mask_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_rgb_mask_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_rgb_mask_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_rgb_mask_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not self.leftonly: if (not file_exists(right_rgb_mask_tiff)) or self.overwrite: right_ratio, right_rgb = gen_cc_enhanced(img_right) if right_ratio is not None and right_rgb is not None: # Bands must be reordered to avoid swapping R and B right_rgb = cv2.cvtColor(right_rgb, cv2.COLOR_BGR2RGB) create_geotiff(right_rgb, right_bounds, right_rgb_mask_tiff, None, False, self.extractor_info, terra_md_full) compress_geotiff(right_rgb_mask_tiff) self.created += 1 self.bytes += os.path.getsize(right_rgb_mask_tiff) else: # If the masked version was not generated, delete any old version as well self.log_info( resource, "a faulty version exists; deleting %s" % right_rgb_mask_tiff) os.remove(right_rgb_mask_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_rgb_mask_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_rgb_mask_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_rgb_mask_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: md = { "files_created": uploaded_file_ids, "left_mask_ratio": left_ratio } if not self.leftonly: md["right_mask_ratio"] = right_ratio extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Load metadata from dataset for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd) if 'spatial_metadata' in terra_md_full: spatial_meta = terra_md_full['spatial_metadata'] else: spatial_meta = None if not spatial_meta: ValueError("No spatial metadata found.") # Determine which files in dataset need clipping files_to_process = {} for f in resource['local_paths']: if f.startswith("ir_geotiff") and f.endswith(".tif"): sensor_name = "ir_geotiff" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": spatial_meta['flirIrCamera']['bounding_box'] } elif f.startswith("rgb_geotiff") and f.endswith(".tif"): sensor_name = "rgb_geotiff" filename = os.path.basename(f) if f.endswith("_left.tif"): side = "left" else: side = "right" files_to_process[filename] = { "path": f, "bounds": spatial_meta[side]['bounding_box'] } elif f.endswith(".las"): sensor_name = "laser3d_las" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": get_las_extents(f) } # TODO: Add case for laser3d heightmap # Fetch experiment name from terra metadata timestamp = resource['dataset_info']['name'].split(" - ")[1] season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine script name target_scan = "unknown_scan" if 'gantry_variable_metadata' in terra_md_full: if 'script_name' in terra_md_full['gantry_variable_metadata']: target_scan = terra_md_full['gantry_variable_metadata']['script_name'] if 'script_hash' in terra_md_full['gantry_variable_metadata']: target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash'] all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa') uploaded_file_ids = [] for filename in files_to_process: file_path = files_to_process[filename]["path"] file_bounds = files_to_process[filename]["bounds"] overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True) if len(overlap_plots) > 0: self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots))) for plotname in overlap_plots: plot_bounds = overlap_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds)) plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)" leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0] self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset) out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename) if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite): """If file is a geoTIFF, simply clip it and upload it to Clowder""" clip_raster(file_path, tuples, out_path=out_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) elif filename.endswith(".las"): """If file is LAS, we can merge with any existing scan+plot output safely""" merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las") merged_txt = merged_out.replace(".las", "_contents.txt") already_merged = False if os.path.exists(merged_txt): # Check if contents with open(merged_txt, 'r') as contents: for entry in contents.readlines(): if entry.strip() == file_path: already_merged = True break if not already_merged: clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out) with open(merged_txt, 'a') as contents: contents.write(file_path+"\n") # Upload the individual plot shards for optimizing las2height later found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_file) # Upload the merged result if necessary found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) # Trigger las2height extractor submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height") # Tell Clowder this is completed so subsequent file updates don't daisy-chain extractor_md = build_metadata(host, self.extractor_info, resource['id'], { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to Level_1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] self.log_info(resource, "determining image quality") left_qual = getImageQuality(img_left) if not self.leftonly: right_qual = getImageQuality(img_right) left_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) if not file_exists(left_nrmac_tiff) or self.overwrite: self.log_info(resource, "creating %s" % left_nrmac_tiff) create_geotiff(np.array([[left_qual, left_qual],[left_qual, left_qual]]), left_bounds, left_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_nrmac_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_nrmac_tiff, remove=self.overwrite) if not found_in_dest or self.overwrite: self.log_info(resource, "uploading %s" % left_nrmac_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_nrmac_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not self.leftonly: if (not file_exists(right_nrmac_tiff) or self.overwrite): self.log_info(resource, "creating %s" % right_nrmac_tiff) create_geotiff(np.array([[right_qual, right_qual],[right_qual, right_qual]]), right_bounds, right_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_nrmac_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_nrmac_tiff, remove=self.overwrite) if not found_in_dest or self.overwrite: self.log_info(resource, "uploading %s" % right_nrmac_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_nrmac_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain md = { "files_created": uploaded_file_ids, "left_quality_score": left_qual } if not self.leftonly: md["right_quality_score"] = right_qual extractor_md = build_metadata(host, self.extractor_info, resource['id'], md, 'file') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)