def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.bin', '_right.bin']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if not self.overwrite and get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if file_exists(left_tiff) and file_exists(right_tiff): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_error(resource, "no terraref metadata found; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore if len(resource['files']) >= 23: md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): timestamp = resource['name'].split(" - ")[1] out_fullday_netcdf = self.sensors.create_sensor_path(timestamp) out_fullday_csv = out_fullday_netcdf.replace(".nc", "_geo.csv") if file_exists(out_fullday_netcdf) and file_exists( out_fullday_csv): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore return CheckMessage.download else: self.log_skip(resource, "found less than 23 files") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for an _ir.bin file before beginning processing if not contains_required_files(resource, ['_ir.bin']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] tif = self.sensors.get_sensor_path(timestamp) png = tif.replace(".tif", ".png") if file_exists(png) and file_exists(tif): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.tif', '_right.tif']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if not self.force: # Check NRMAC score > 15 before proceeding if available nrmac_md = get_extractor_metadata(md, "terra.stereo-rgb.nrmac") if not (nrmac_md and 'left_quality_score' in nrmac_md): self.log_skip(resource, "NRMAC quality score not available") return CheckMessage.ignore elif float(nrmac_md['left_quality_score']) > self.threshold: self.log_skip( resource, "NRMAC quality score %s is above threshold of %s" % (float( nrmac_md['left_quality_score']), self.threshold)) return CheckMessage.ignore if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_enh_tiff = self.sensors.create_sensor_path( timestamp, opts=['right']) if file_exists(left_enh_tiff) and file_exists(right_enh_tiff): if contains_required_files(resource, [ os.path.basename(left_enh_tiff), os.path.basename(right_enh_tiff) ]): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore else: self.log_info( resource, "output files exist but not yet uploaded") # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_error(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore if not contains_required_files( resource, ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']): self.log_skip(resource, "missing required files") return CheckMessage.ignore if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' else: sensor_fullname = 'vnir_netcdf' timestamp = resource['dataset_info']['name'].split(" - ")[1] md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist out_nc = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname) if file_exists(out_nc): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") # See if we can recover it from disk if sensor_fullname == 'vnir_netcdf': date = timestamp.split("__")[0] source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % ( date, timestamp) for f in os.listdir(source_dir): if f.endswith("_metadata.json"): self.log_info(resource, "updating metadata from %s" % f) raw_dsmd = load_json_file(os.path.join(source_dir, f)) clean_md = clean_metadata(raw_dsmd, 'VNIR') complete_md = build_metadata(host, self.extractor_info, resource['id'], clean_md, 'dataset') remove_metadata(connector, host, secret_key, resource['id']) upload_metadata(connector, host, secret_key, resource['id'], complete_md) return CheckMessage.download return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found found_left = False found_right = False for f in resource['files']: if 'filename' in f: if f['filename'].endswith('_left.bin'): found_left = True elif f['filename'].endswith('_right.bin'): found_right = True if not (found_left and found_right): self.log_skip( resource, "found left: %s, right: %s" % (found_left, found_right)) return CheckMessage.ignore # Check if outputs already exist unless overwrite is forced - skip if found if not self.overwrite: timestamp = resource['dataset_info']['name'].split(" - ")[1] lbase = self.sensors.get_sensor_path(timestamp, opts=['left'], ext='') rbase = self.sensors.get_sensor_path(timestamp, opts=['right'], ext='') out_dir = os.path.dirname(lbase) if (os.path.isfile(lbase + 'tif') and os.path.isfile(rbase + 'tif')): self.log_skip(resource, "outputs found in %s" % out_dir) return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: self.log_skip("metadata indicates it was already processed") return CheckMessage.ignore if get_terraref_metadata(md): return CheckMessage.download else: self.log_skip("no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if not is_latest_file(resource): return CheckMessage.ignore # Check for expected input files before beginning processing if len(get_all_files(resource)) >= 23: md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): self.log_skip(resource, "metadata v%s already exists" % self.extractor_info['version']) return CheckMessage.ignore return CheckMessage.download else: self.log_skip(resource, 'not all input files are ready') return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): # Check for 0000-0101 bin files before beginning processing if len(resource['files']) < 102: return CheckMessage.ignore if not is_latest_file(resource): return CheckMessage.ignore timestamp = resource['dataset_info']['name'].split(" - ")[1] hist_path = self.sensors.get_sensor_path(timestamp, opts=['combined_hist']) coloredImg_path = self.sensors.get_sensor_path( timestamp, opts=['combined_pseudocolored']) # Count number of bin files in dataset, as well as number of existing outputs ind_add = 0 ind_output = 0 for ind in range(0, 102): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 for f in resource['files']: if f['filename'].endswith(format_ind + '.bin'): ind_add += 1 out_png = self.sensors.get_sensor_path(timestamp, opts=[format_ind]) if os.path.exists(out_png) and not self.overwrite: ind_output += 1 break # Do the outputs already exist? if ind_output == 102 and os.path.exists(hist_path) and os.path.exists( coloredImg_path): logging.info("skipping dataset %s, outputs already exist" % resource['id']) return CheckMessage.ignore # Do we have too few input BIN files? if ind_add < 102: return CheckMessage.ignore md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: logging.info("skipping dataset %s, found existing metadata" % resource['id']) return CheckMessage.ignore if get_terraref_metadata(md): return CheckMessage.download return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): return CheckMessage.ignore # Check for an ir.BIN file and metadata before beginning processing found_ir = None found_md = None for f in resource['files']: if 'filename' in f and f['filename'].endswith('_ir.bin'): found_ir = f['filepath'] elif 'filename' in f and f['filename'].endswith('_metadata.json'): found_md = f['filepath'] if found_ir: # Check if outputs already exist timestamp = resource['dataset_info']['name'].split(" - ")[1] png_path = self.sensors.get_sensor_path(timestamp, ext='png') tiff_path = self.sensors.get_sensor_path(timestamp) if os.path.exists(png_path) and os.path.exists( tiff_path) and not self.overwrite: logging.getLogger(__name__).info( "skipping dataset %s, outputs already exist" % resource['id']) return CheckMessage.ignore # If we don't find _metadata.json file, check if we have metadata attached to dataset instead if not found_md: md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: logging.getLogger(__name__).info( "skipping dataset %s, already processed" % resource['id']) return CheckMessage.ignore if get_terraref_metadata(md): return CheckMessage.download return CheckMessage.ignore else: return CheckMessage.download return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.tif', '_right.tif']): self.log_skip(resource, "missing required files") # Check for raw_data_source in metadata and resumbit to bin2tif if available... md = download_metadata(connector, host, secret_key, resource['id']) terra_md = get_terraref_metadata(md) if 'raw_data_source' in terra_md: raw_id = str(terra_md['raw_data_source'].split("/")[-1]) self.log_info(resource, "submitting raw source %s to bin2tif" % raw_id) submit_extraction(connector, host, secret_key, raw_id, "terra.stereo-rgb.bin2tif") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if (self.leftonly and file_exists(left_nrmac_tiff)) or ( not self.leftonly and file_exists(left_nrmac_tiff) and file_exists(right_nrmac_tiff)): if contains_required_files(resource, [os.path.basename(left_nrmac_tiff)]): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore else: self.log_info(resource, "output file exists but not yet uploaded") # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): # Check for 0000-0101 bin files before beginning processing if len(resource['files']) < 102: return CheckMessage.ignore if not is_latest_file(resource): return CheckMessage.ignore # Count number of bin files in dataset, as well as number of existing outputs ind_add = 0 for ind in range(0, 102): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 for f in resource['files']: if f['filename'].endswith(format_ind+'.bin'): ind_add += 1 break # Do we have too few input BIN files? if ind_add < 102: return CheckMessage.ignore else: return CheckMessage.download
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): # Check for a left and right TIF file - skip if not found # If we're only processing the left files, don't check for the right file needed_files = ['_left.tif'] if not self.leftonly: needed_files.append('_right.tif') if not contains_required_files(resource, needed_files): self.log_skip(resource, "missing required files") return CheckMessage.ignore if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if (self.leftonly and file_exists(left_mask_tiff)) or \ (not (file_exists(left_mask_tiff) and file_exists(right_mask_tiff))): self.log_skip(resource, "metadata v%s and outputs already exist" % \ self.extractor_info['version']) return CheckMessage.ignore # Check for other images to create a mask on elif not contains_required_files(resource, ['.tif']): self.log_skip(resource, "missing required tiff file") return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download