def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.tif', '_right.tif']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if not self.force: # Check NRMAC score > 15 before proceeding if available nrmac_md = get_extractor_metadata(md, "terra.stereo-rgb.nrmac") if not (nrmac_md and 'left_quality_score' in nrmac_md): self.log_skip(resource, "NRMAC quality score not available") return CheckMessage.ignore elif float(nrmac_md['left_quality_score']) > self.threshold: self.log_skip( resource, "NRMAC quality score %s is above threshold of %s" % (float( nrmac_md['left_quality_score']), self.threshold)) return CheckMessage.ignore if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_enh_tiff = self.sensors.create_sensor_path( timestamp, opts=['right']) if file_exists(left_enh_tiff) and file_exists(right_enh_tiff): if contains_required_files(resource, [ os.path.basename(left_enh_tiff), os.path.basename(right_enh_tiff) ]): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore else: self.log_info( resource, "output files exist but not yet uploaded") # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_error(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.bin', '_right.bin']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if not self.overwrite and get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if file_exists(left_tiff) and file_exists(right_tiff): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_error(resource, "no terraref metadata found; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for an _ir.bin file before beginning processing if not contains_required_files(resource, ['_ir.bin']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] tif = self.sensors.get_sensor_path(timestamp) png = tif.replace(".tif", ".png") if file_exists(png) and file_exists(tif): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore if len(resource['files']) >= 23: md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): timestamp = resource['name'].split(" - ")[1] out_fullday_netcdf = self.sensors.create_sensor_path(timestamp) out_fullday_csv = out_fullday_netcdf.replace(".nc", "_geo.csv") if file_exists(out_fullday_netcdf) and file_exists( out_fullday_csv): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore return CheckMessage.download else: self.log_skip(resource, "found less than 23 files") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): # First, check if we have the correct sensor type md = download_metadata(connector, host, secret_key, resource['parent']['id']) ds_info = get_info(connector, host, secret_key, resource['parent']['id']) sensortype = self.determineSensorType(ds_info['name']) if sensortype in ["ndvi", "pri"]: if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: logging.info("skipping dataset %s, already processed" % resource['id']) return CheckMessage.ignore # Check if output already exists timestamp = ds_info['name'].split(" - ")[1] out_file = self.get_sensor_path(timestamp, opts=['extracted_values']) if os.path.isfile(out_file) and not self.overwrite: logging.info("skipping %s, outputs already exist" % resource['id']) return CheckMessage.ignore return CheckMessage.download else: return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): self.start_check(resource) md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata(md, self.extractor_info['name']) and not self.overwrite: self.log_skip(resource,"metadata indicates it was already processed") return CheckMessage.ignore return CheckMessage.download
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore if not contains_required_files( resource, ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']): self.log_skip(resource, "missing required files") return CheckMessage.ignore if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' else: sensor_fullname = 'vnir_netcdf' timestamp = resource['dataset_info']['name'].split(" - ")[1] md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist out_nc = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname) if file_exists(out_nc): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") # See if we can recover it from disk if sensor_fullname == 'vnir_netcdf': date = timestamp.split("__")[0] source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % ( date, timestamp) for f in os.listdir(source_dir): if f.endswith("_metadata.json"): self.log_info(resource, "updating metadata from %s" % f) raw_dsmd = load_json_file(os.path.join(source_dir, f)) clean_md = clean_metadata(raw_dsmd, 'VNIR') complete_md = build_metadata(host, self.extractor_info, resource['id'], clean_md, 'dataset') remove_metadata(connector, host, secret_key, resource['id']) upload_metadata(connector, host, secret_key, resource['id'], complete_md) return CheckMessage.download return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if resource['name'].find('fullfield') > -1 and re.match("^.*\d+_ir_.*.tif", resource['name']): # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata(md, self.extractor_info['name']) and not self.overwrite: self.log_skip(resource,"metadata indicates it was already processed") return CheckMessage.ignore return CheckMessage.download else: self.log_skip(resource,"regex not matched for %s" % resource['name']) return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found found_left = False found_right = False for f in resource['files']: if 'filename' in f: if f['filename'].endswith('_left.bin'): found_left = True elif f['filename'].endswith('_right.bin'): found_right = True if not (found_left and found_right): self.log_skip( resource, "found left: %s, right: %s" % (found_left, found_right)) return CheckMessage.ignore # Check if outputs already exist unless overwrite is forced - skip if found if not self.overwrite: timestamp = resource['dataset_info']['name'].split(" - ")[1] lbase = self.sensors.get_sensor_path(timestamp, opts=['left'], ext='') rbase = self.sensors.get_sensor_path(timestamp, opts=['right'], ext='') out_dir = os.path.dirname(lbase) if (os.path.isfile(lbase + 'tif') and os.path.isfile(rbase + 'tif')): self.log_skip(resource, "outputs found in %s" % out_dir) return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: self.log_skip("metadata indicates it was already processed") return CheckMessage.ignore if get_terraref_metadata(md): return CheckMessage.download else: self.log_skip("no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if not is_latest_file(resource): return CheckMessage.ignore # Check for expected input files before beginning processing if len(get_all_files(resource)) >= 23: md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): self.log_skip(resource, "metadata v%s already exists" % self.extractor_info['version']) return CheckMessage.ignore return CheckMessage.download else: self.log_skip(resource, 'not all input files are ready') return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if resource['name'].find('fullfield') > -1 and re.match( "^.*\d+_rgb_.*thumb.tif", resource['name']): # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['parent']['id']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: logging.info( "skipping dataset %s; metadata indicates it was already processed" % resource['id']) return CheckMessage.ignore return CheckMessage.download return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): return CheckMessage.ignore # Check for an ir.BIN file and metadata before beginning processing found_ir = None found_md = None for f in resource['files']: if 'filename' in f and f['filename'].endswith('_ir.bin'): found_ir = f['filepath'] elif 'filename' in f and f['filename'].endswith('_metadata.json'): found_md = f['filepath'] if found_ir: # Check if outputs already exist timestamp = resource['dataset_info']['name'].split(" - ")[1] png_path = self.sensors.get_sensor_path(timestamp, ext='png') tiff_path = self.sensors.get_sensor_path(timestamp) if os.path.exists(png_path) and os.path.exists( tiff_path) and not self.overwrite: logging.getLogger(__name__).info( "skipping dataset %s, outputs already exist" % resource['id']) return CheckMessage.ignore # If we don't find _metadata.json file, check if we have metadata attached to dataset instead if not found_md: md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: logging.getLogger(__name__).info( "skipping dataset %s, already processed" % resource['id']) return CheckMessage.ignore if get_terraref_metadata(md): return CheckMessage.download return CheckMessage.ignore else: return CheckMessage.download return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): # Check for 0000-0101 bin files before beginning processing if len(resource['files']) < 102: self.log_skip(resource, "less than 102 files found") return CheckMessage.ignore if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore timestamp = resource['dataset_info']['name'].split(" - ")[1] hist_path = self.sensors.get_sensor_path(timestamp, opts=['combined_hist']) coloredImg_path = self.sensors.get_sensor_path(timestamp, opts=['combined_pseudocolored']) # Count number of bin files in dataset, as well as number of existing outputs ind_add = 0 ind_output = 0 for ind in range(0, 102): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 for f in resource['files']: if f['filename'].endswith(format_ind+'.bin'): ind_add += 1 out_png = self.sensors.get_sensor_path(timestamp, opts=[format_ind]) if os.path.exists(out_png) and not self.overwrite: ind_output += 1 break # Do the outputs already exist? if ind_output == 102 and os.path.exists(hist_path) and os.path.exists(coloredImg_path): self.log_skip(resource, "outputs already exist") return CheckMessage.ignore # Do we have too few input BIN files? if ind_add < 102: self.log_skip(resource, "less than 102 .bin files found") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata(md, self.extractor_info['name']) and not self.overwrite: self.log_skip(resource, "metadata indicates it was already processed") return CheckMessage.ignore if get_terraref_metadata(md): return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): #if not is_latest_file(resource): # return CheckMessage.ignore # Adjust sensor path based on VNIR vs SWIR if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' else: sensor_fullname = 'vnir_netcdf' if has_all_files(resource): # Check if output already exists timestamp = resource['dataset_info']['name'].split(" - ")[1] outFilePath = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname) if os.path.exists(outFilePath) and not self.overwrite: logging.getLogger(__name__).info( 'skipping dataset %s, output file already exists' % resource['id']) return CheckMessage.ignore else: # Check if we have necessary metadata, either as a .json file or attached to dataset md = download_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: logging.getLogger(__name__).info( "skipping dataset %s, already processed" % resource['id']) return CheckMessage.ignore elif get_terraref_metadata(md): return CheckMessage.download else: for f in resource['files']: if f['filename'] == 'metadata.json': return CheckMessage.download return CheckMessage.ignore else: logging.getLogger(__name__).info( 'skipping dataset %s, not all input files are ready' % resource['id']) return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): self.start_check(resource) if resource['name'].startswith('rgb_fullfield') > -1 and resource[ 'name'].endswith('_mask.tif'): # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata( md, self.extractor_info['name'], self.extractor_info['version']) and not self.overwrite: self.log_skip(resource, "metadata indicates it was already processed") return CheckMessage.ignore return CheckMessage.download else: self.log_skip(resource, "expected filename mismatch: %s" % resource['name']) return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if resource['name'].endswith('_left.tif') or resource['name'].endswith( '_right.tif'): # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: self.log_skip(resource, "metadata indicates it was already processed") return CheckMessage.ignore return CheckMessage.download else: self.log_skip(resource, "not left/right geotiff") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.tif', '_right.tif']): self.log_skip(resource, "missing required files") # Check for raw_data_source in metadata and resumbit to bin2tif if available... md = download_metadata(connector, host, secret_key, resource['id']) terra_md = get_terraref_metadata(md) if 'raw_data_source' in terra_md: raw_id = str(terra_md['raw_data_source'].split("/")[-1]) self.log_info(resource, "submitting raw source %s to bin2tif" % raw_id) submit_extraction(connector, host, secret_key, raw_id, "terra.stereo-rgb.bin2tif") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if (self.leftonly and file_exists(left_nrmac_tiff)) or ( not self.leftonly and file_exists(left_nrmac_tiff) and file_exists(right_nrmac_tiff)): if contains_required_files(resource, [os.path.basename(left_nrmac_tiff)]): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore else: self.log_info(resource, "output file exists but not yet uploaded") # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): # Check for a left and right TIF file - skip if not found # If we're only processing the left files, don't check for the right file needed_files = ['_left.tif'] if not self.leftonly: needed_files.append('_right.tif') if not contains_required_files(resource, needed_files): self.log_skip(resource, "missing required files") return CheckMessage.ignore if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if (self.leftonly and file_exists(left_mask_tiff)) or \ (not (file_exists(left_mask_tiff) and file_exists(right_mask_tiff))): self.log_skip(resource, "metadata v%s and outputs already exist" % \ self.extractor_info['version']) return CheckMessage.ignore # Check for other images to create a mask on elif not contains_required_files(resource, ['.tif']): self.log_skip(resource, "missing required tiff file") return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download
def check_message(self, connector, host, secret_key, resource, parameters): if resource['type'] != "dataset": if 'name' not in resource: resource['name'] = resource["type"] self.log_skip(resource, "position is only logged for dataset metadata") return CheckMessage.ignore self.start_check(resource) if 'spatial_metadata' in resource['metadata']: ds_md = download_metadata(connector, host, secret_key, resource['id']) ext_md = get_extractor_metadata(ds_md, self.extractor_info['name']) if not ext_md: return CheckMessage.bypass else: self.log_skip(resource, "sensorposition metadata already exists") return CheckMessage.ignore else: self.log_skip(resource, "newly added metadata is not from LemnaTec") return CheckMessage.ignore
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, terra_md_full = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product if necessary target_md = download_metadata(connector, host, secret_key, target_dsid) if not get_extractor_metadata(target_md, self.extractor_info['name']): self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) try: left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left') gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right') gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) except KeyError: self.log_error(resource, "spatial metadata not properly identified; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return if (not file_exists(left_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % left_tiff) left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None) create_geotiff(left_image, gps_bounds_left, left_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if (not file_exists(right_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % right_tiff) right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None) create_geotiff(right_image, gps_bounds_right, right_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) try: upload_metadata(connector, host, secret_key, resource['id'], extractor_md) except: self.log_info(resource, "problem uploading extractor metadata...") self.end_message(resource)