def upload_ds(conn, host, key, sensor, date, timestamp, ds_files, ds_meta): if len(ds_files) > 0: year, month, dd = date.split("-") if DRY_RUN: log("[%s] %s files" % (sensor + ' - ' + timestamp, len(ds_files))) return if TIMESTAMP_FOLDER: dataset_id = build_dataset_hierarchy(CLOWDER_HOST, CLOWDER_KEY, CLOWDER_USER, CLOWDER_PASS, SPACE_ID, sensor, year, month, dd, sensor + ' - ' + timestamp) else: dataset_id = build_dataset_hierarchy(CLOWDER_HOST, CLOWDER_KEY, CLOWDER_USER, CLOWDER_PASS, SPACE_ID, sensor, year, month, leaf_ds_name=sensor + ' - ' + date) log("adding files to Clowder dataset %s" % dataset_id) for FILEPATH in ds_files: upload_to_dataset(CONN, CLOWDER_HOST, CLOWDER_KEY, dataset_id, FILEPATH) if len(ds_meta.keys()) > 0: log("adding metadata to Clowder dataset %s" % dataset_id) format_md = { "@context": [ "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld", { "@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#" } ], "content": ds_meta, "agent": { "@type": "cat:user", "user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/58e2a7b9fe3ae3efc1632ae8" } } upload_metadata(CONN, CLOWDER_HOST, CLOWDER_KEY, dataset_id, format_md)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() inPath = resource['local_paths'][0] # Determine output file path ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] out_file = self.create_sensor_path(timestamp, opts=['extracted_values']) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy( connector, host, secret_key, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[:7], timestamp[:10], leaf_ds_name=resource['dataset_info']['name']) # Extract NDVI values if not os.path.isfile(out_file) or self.overwrite: logging.info("...writing values to: %s" % out_file) data = open(inPath, "rb").read() values = float(data[49:66]) data.close() with open(out_file, 'wb') as csvfile: fields = ['file_name', 'NDVI'] # fields name for csv file wr = csv.DictWriter(csvfile, fieldnames=fields, lineterminator='\n') wr.writeheader() wr.writerow({'file_name': resource['name'], 'NDVI': values}) # TODO: Send this to geostreams fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(out_file) else: logging.info("%s already exists; skipping %s" % (out_file, resource['id'])) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, metadata) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() for p in resource['local_paths']: if p.endswith(".bin"): input_dir = p.replace(os.path.basename(p), '') # TODO: Eventually light may be in separate location input_dir_light = input_dir # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] out_name_base = self.sensors.create_sensor_path(timestamp, ext='') uploaded_file_ids = [] subprocess.call(["octave --eval \"PSII(\'%s\',\'%s\' ,\'%s\')\"" % (input_dir, input_dir_light, out_name_base)],shell=True); target_dsid = build_dataset_hierarchy(connector, host, secret_key, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[:7], timestamp[:10], leaf_ds_name=resource['dataset_info']['name']) for out_file in ["_Fm_dark", "_Fv_dark", "_FvFm_dark", "_Fm_light", "_Fv_light", "_FvFm_light", "_Phi_PSII", "_NPQ", "_qN", "_qP", "_Rfd"]: full_out_name = out_name_base + out_file + ".png" if os.path.isfile(full_out_name) and full_out_name not in resource["local_paths"]: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, full_out_name) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(full_out_name) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Find input files in dataset target_files = { 'raw': None, 'raw.hdr': None, 'image.jpg': None, 'frameIndex.txt': None, 'settings.txt': None, "_metadata.json": None } metafile = None ds_metafile = None last_path = None path_match = None tempdir = None symlinks = [] for f in resource['local_paths']: for fileExt in target_files.keys(): if f.endswith(fileExt): if fileExt != '_metadata.json': filedir = os.path.dirname(f) if not last_path: last_path = filedir else: if filedir != last_path: path_match = False last_path = filedir target_files[fileExt] = {'filename': os.path.basename(f), 'path': f} else: if f.endswith('/_dataset_metadata.json'): ds_metafile = f elif not f.endswith('/_metadata.json'): metafile = f target_files['_metadata.json'] = {'filename': os.path.basename(metafile), 'path': metafile} # Identify md file either with other dataset files, or attached to Clowder dataset if metafile == None: if ds_metafile != None: # Found dataset metadata, so check for the .json file alongside other files logging.getLogger(__name__).info("...checking for local metadata file alongside other files") ds_dir = os.path.dirname(target_files['raw']['path']) for ds_f in os.path.listdir(ds_dir): if ds_f.endswith("_metadata.json"): target_files['_metadata.json']['path'] = os.path.join(ds_dir, ds_f) else: raise ValueError('could not locate metadata for %s' % resource['id']) # Create symlinks in one directory if inputs aren't in the same one if not path_match: tempdir = tempfile.mkdtemp() for f in target_files.keys(): currf = target_files[f] if currf['filename'] == '_dataset_metadata.json': # Open the temporary file and change the JSON content format with open(currf['path'], 'r') as mdfile: jsondata = json.load(mdfile) md = get_terraref_metadata(jsondata) with open(currf['path'], 'w') as mdfile: json.dump(md, mdfile) newf = os.path.join(tempdir, target_files['raw']['filename'].replace("_raw","")+'_metadata.json') else: newf = os.path.join(tempdir, currf['filename']) os.symlink(currf['path'], newf) symlinks.append(newf) # Adjust sensor path based on VNIR vs SWIR and check for soil mask timestamp = resource['dataset_info']['name'].split(" - ")[1] if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' soil_mask = None else: sensor_fullname = 'vnir_netcdf' # Check for corresponding soil mask to include in workflow.sh if available soil_mask = self.sensors.get_sensor_path(timestamp, sensor='vnir_soil_masks', opts=['soil_mask']) outFilePath = self.sensors.create_sensor_path(timestamp, sensor=sensor_fullname) # Invoke terraref.sh logging.getLogger(__name__).info('invoking hyperspectral_workflow.sh to create: %s' % outFilePath) # TODO: Move this script_path = "/projects/arpae/terraref/shared/extractors/extractors-hyperspectral/hyperspectral/hyperspectral_workflow.sh" if soil_mask: returncode = subprocess.call(["bash", script_path, "-d", "1", "-h", "-m", soil_mask, "--new_clb_mth", "-i", target_files['raw']['path'], "-o", outFilePath]) else: returncode = subprocess.call(["bash", script_path, "-d", "1", "-h", "--new_clb_mth", "-i", target_files['raw']['path'], "-o", outFilePath]) # Verify outfile exists and upload to clowder logging.getLogger(__name__).info('done creating output file (%s)' % (returncode)) if returncode != 0: raise ValueError('script encountered an error') if os.path.exists(outFilePath): if returncode == 0: if outFilePath not in resource['local_paths']: target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(sensor=sensor_fullname), timestamp[:4], timestamp[:7], timestamp[:10], leaf_ds_name=self.sensors.get_display_name(sensor=sensor_fullname)+' - '+timestamp) logging.getLogger(__name__).info('uploading %s' % outFilePath) upload_to_dataset(connector, host, secret_key, target_dsid, outFilePath) self.created += 1 self.bytes += os.path.getsize(outFilePath) else: logging.getLogger(__name__).error('no output file was produced') # Send indices to betyDB ind_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['_ind']) with Dataset(ind_file, "r") as netCDF_handle: ndvi = netCDF_handle.get_variables_by_attributes( standard_name='normalized_difference_chlorophyll_index_750_705') NDVI705 = ndvi[0].getValue().ravel()[0] # TODO: Create CSV using ndviVal as primary key tmp_csv = 'traits.csv' plot_no = 'Full Field' csv_header = 'local_datetime,NDVI705,access_level,species,site,' \ 'citation_author,citation_year,citation_title,method' csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \ 'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % ( timestamp, NDVI705, plot_no) with open(tmp_csv, 'w') as c: c.write(csv_header+'\n'+csv_vals) submit_traits(tmp_csv, bety_key=self.bety_key) # Remove symlinks and temp directory for sym in symlinks: os.remove(sym) if tempdir: os.rmdir(tempdir) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) metadata = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, metadata]: self.log_error( "could not locate each of left+right+metadata in processing") raise ValueError( "could not locate each of left+right+metadata in processing") # Determine output location & filenames timestamp = resource['dataset_info']['name'].split(" - ")[1] left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] self.log_info(resource, "determining image shapes & gps bounds") left_shape = bin2tiff.get_image_shape(metadata, 'left') right_shape = bin2tiff.get_image_shape(metadata, 'right') left_gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['left']['bounding_box']) right_gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['right']['bounding_box']) out_tmp_tiff = os.path.join(tempfile.gettempdir(), resource['id'].encode('utf8')) target_dsid = build_dataset_hierarchy( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) if (not os.path.isfile(left_tiff)) or self.overwrite: self.log_info(resource, "creating & uploading %s" % left_tiff) left_image = bin2tiff.process_image(left_shape, img_left, None) # Rename output.tif after creation to avoid long path errors create_geotiff(left_image, left_gps_bounds, out_tmp_tiff, None, False, self.extractor_info, metadata) # TODO: we're moving zero byte files shutil.move(out_tmp_tiff, left_tiff) if left_tiff not in resource['local_paths']: fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) else: self.log_info( resource, "file found in dataset already; not re-uploading") self.created += 1 self.bytes += os.path.getsize(left_tiff) if (not os.path.isfile(right_tiff)) or self.overwrite: self.log_info(resource, "creating & uploading %s" % right_tiff) right_image = bin2tiff.process_image(right_shape, img_right, None) create_geotiff(right_image, right_gps_bounds, out_tmp_tiff, None, False, self.extractor_info, metadata) shutil.move(out_tmp_tiff, right_tiff) if right_tiff not in resource['local_paths']: fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) else: self.log_info( resource, "file found in dataset already; not re-uploading") self.created += 1 self.bytes += os.path.getsize(right_tiff) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], ext_meta) # Upload original Lemnatec metadata to new Level_1 dataset md = get_terraref_metadata(all_dsmd) md['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] lemna_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading LemnaTec metadata") upload_metadata(connector, host, secret_key, target_dsid, lemna_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get bin files and metadata metadata = None for f in resource['local_paths']: # First check metadata attached to dataset in Clowder for item of interest if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) metadata = get_terraref_metadata(all_dsmd, "ps2Top") # Otherwise, check if metadata was uploaded as a .json file elif f.endswith('_metadata.json') and f.find('/_metadata.json') == -1 and metadata is None: metadata = load_json_file(f) frames = {} for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 for f in resource['local_paths']: if f.endswith(format_ind+'.bin'): frames[ind] = f if None in [metadata] or len(frames) < 101: self.log_error(resource, 'could not find all of frames/metadata') return # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] hist_path = self.sensors.create_sensor_path(timestamp, opts=['combined_hist']) coloredImg_path = self.sensors.create_sensor_path(timestamp, opts=['combined_pseudocolored']) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp) (img_width, img_height) = self.get_image_dimensions(metadata) gps_bounds = geojson_to_tuples(metadata['spatial_metadata']['ps2Top']['bounding_box']) self.log_info(resource, "image dimensions (w, h): (%s, %s)" % (img_width, img_height)) png_frames = {} # skip 0101.bin since 101 is an XML file that lists the frame times for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 png_path = self.sensors.create_sensor_path(timestamp, opts=[format_ind]) tif_path = png_path.replace(".png", ".tif") png_frames[ind] = png_path if not os.path.exists(png_path) or self.overwrite: self.log_info(resource, "generating and uploading %s" % png_path) pixels = np.fromfile(frames[ind], np.dtype('uint8')).reshape([int(img_height), int(img_width)]) create_image(pixels, png_path) create_geotiff(pixels, gps_bounds, tif_path, None, False, self.extractor_info, metadata) if png_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(png_path) # Generate aggregate outputs self.log_info(resource, "generating aggregates") if not (os.path.exists(hist_path) and os.path.exists(coloredImg_path)) or self.overwrite: # TODO: Coerce histogram and pseudocolor to geotiff? self.analyze(int(img_width), int(img_height), png_frames, hist_path, coloredImg_path) self.created += 2 self.bytes += os.path.getsize(hist_path) + os.path.getsize(coloredImg_path) if hist_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, hist_path) uploaded_file_ids.append(fileid) if coloredImg_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, coloredImg_path) uploaded_file_ids.append(fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message(resource)
def notifyClowderOfCompletedTask(task): # Verify that globus user has a mapping to clowder credentials in config file globUser = task['user'] userMap = config['clowder']['user_map'] if globUser in userMap: logger.info("%s task complete; notifying Clowder" % task['globus_id'], extra={ "globus_id": task['globus_id'], "action": "NOTIFYING CLOWDER OF COMPLETION" }) clowder_host = config['clowder']['host'] clowder_key = config['clowder']['secret_key'] clowder_user = userMap[globUser]['clowder_user'] clowder_pass = userMap[globUser]['clowder_pass'] clowder_id = userMap[globUser]['clowder_id'] clowder_context = userMap[globUser]['context'] sess = requests.Session() sess.auth = (clowder_user, clowder_pass) # Response can be OK, RETRY or ERROR response = "OK" # Prepare upload object with all file(s) found updatedTask = safeCopy(task) space_id = task['contents']['space_id'] if 'space_id' in task[ 'contents'] else config['clowder']['primary_space'] for ds in task['contents']: # Skip any unexpected files at root level, e.g. # /home/clowder/sites/ua-mac/raw_data/GetFluorescenceValues.m # /home/clowder/sites/ua-mac/raw_data/irrigation/2017-06-04/@Recycle/flowmetertotals_March-2017.csv", if ds in ["LemnaTec - MovingSensor"] or ds.find("@Recycle") > -1: continue filesQueued = [] fileFormData = [] datasetMD = None datasetMDFile = False lastFile = None lastFileKey = None sensorname = ds.split(" - ")[0] logger.info("%s -- Processing [%s]" % (task['globus_id'], ds)) # Assign dataset-level metadata if provided if "md" in task['contents'][ds]: datasetMD = task['contents'][ds]['md'] # Add local files to dataset by path if 'files' in task['contents'][ds]: for fkey in task['contents'][ds]['files']: fobj = task['contents'][ds]['files'][fkey] if 'clowder_id' not in fobj or fobj['clowder_id'] == "": if os.path.exists(fobj['path']): if fobj['name'].find("metadata.json") == -1: if 'md' in fobj: # Use [1,-1] to avoid json.dumps wrapping quotes # Replace \" with " to avoid json.dumps escaping quotes mdstr = ', "md":' + json.dumps( fobj['md'])[1:-1].replace('\\"', '"') else: mdstr = "" filesQueued.append((fobj['path'], mdstr)) lastFile = fobj['name'] lastFileKey = fkey else: try: datasetMD = loadJsonFile(fobj['path']) datasetMDFile = fkey except: logger.error( "[%s] could not decode JSON from %s" % (ds, fobj['path'])) updatedTask['contents'][ds]['files'][fkey][ 'clowder_id'] = "FILE NOT FOUND" updatedTask['contents'][ds]['files'][fkey][ 'error'] = "Failed to load JSON" writeTaskToDatabase(updatedTask) if response == "OK": response = "ERROR" # Don't overwrite a RETRY else: logger.error("[%s] file not found: %s" % (ds, fobj['path'])) updatedTask['contents'][ds]['files'][fkey][ 'clowder_id'] = "FILE NOT FOUND" updatedTask['contents'][ds]['files'][fkey][ 'error'] = "File not found" writeTaskToDatabase(updatedTask) if response == "OK": response = "ERROR" # Don't overwrite a RETRY if len(filesQueued) > 0 or datasetMD: # Try to clean metadata first if datasetMD: cleaned_dsmd = None try: cleaned_dsmd = clean_metadata(datasetMD, sensorname) except Exception as e: logger.error("[%s] could not clean md: %s" % (ds, str(e))) task['contents'][ds][ 'error'] = "Could not clean metadata: %s" % str(e) # TODO: possible this could be recoverable with more info from clean_metadata if response == "OK": response = "ERROR" # Don't overwrite a RETRY if ds.find(" - ") > -1: # e.g. "co2Sensor - 2016-12-25" or "VNIR - 2016-12-25__12-32-42-123" c_sensor = ds.split(" - ")[0] c_date = ds.split(" - ")[1] c_year = c_date.split('-')[0] c_month = c_date.split('-')[1] if c_date.find("__") == -1: # If we only have a date and not a timestamp, don't create date collection c_date = None else: c_date = c_date.split("__")[0].split("-")[2] else: c_sensor, c_date, c_year, c_month = ds, None, None, None # Get dataset from clowder, or create & associate with collections try: hierarchy_host = clowder_host + ( "/" if not clowder_host.endswith("/") else "") dsid = build_dataset_hierarchy(hierarchy_host, clowder_key, clowder_user, clowder_pass, space_id, c_sensor, c_year, c_month, c_date, ds) logger.info(" [%s] id: %s" % (ds, dsid)) except Exception as e: logger.error("[%s] could not build hierarchy: %s" % (ds, str(e))) task['contents'][ds][ 'retry'] = "Could not build dataset hierarchy: %s" % str( e) response = "RETRY" continue if dsid: dsFileList = fetchDatasetFileList(dsid, sess) # Only send files not already present in dataset by path for queued in filesQueued: alreadyStored = False for storedFile in dsFileList: if queued[0] == storedFile['filepath']: logger.info( " skipping file %s (already uploaded)" % queued[0]) alreadyStored = True break if not alreadyStored: fileFormData.append( ("file", '{"path":"%s"%s}' % (queued[0], queued[1]))) if datasetMD and cleaned_dsmd: # Check for existing metadata from the site user alreadyAttached = False md_existing = download_metadata( None, hierarchy_host, clowder_key, dsid) for mdobj in md_existing: if 'agent' in mdobj and 'user_id' in mdobj['agent']: if mdobj['agent'][ 'user_id'] == "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % clowder_id: logger.info( " skipping metadata (already attached)" ) alreadyAttached = True break if not alreadyAttached: md = { "@context": [ "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld", { "@vocab": clowder_context } ], "content": cleaned_dsmd, "agent": { "@type": "cat:user", "user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % clowder_id } } dsmd = sess.post( clowder_host + "/api/datasets/" + dsid + "/metadata.jsonld", headers={'Content-Type': 'application/json'}, data=json.dumps(md)) if dsmd.status_code in [500, 502, 504]: logger.error( "[%s] failed to attach metadata (%s: %s)" % (ds, dsmd.status_code, dsmd.text)) updatedTask['contents'][ds]['files'][ datasetMDFile]['retry'] = "%s: %s" % ( dsmd.status_code, dsmd.text) response = "RETRY" elif dsmd.status_code != 200: logger.error( "[%s] failed to attach metadata (%s: %s)" % (ds, dsmd.status_code, dsmd.text)) updatedTask['contents'][ds]['files'][ datasetMDFile]['error'] = "%s: %s" % ( dsmd.status_code, dsmd.text) response = "ERROR" else: if datasetMDFile: logger.info( " [%s] added metadata from .json file" % ds, extra={ "dataset_name": ds, "dataset_id": dsid, "action": "METADATA ADDED", "metadata": datasetMD }) updatedTask['contents'][ds]['files'][ datasetMDFile][ 'metadata_loaded'] = True updatedTask['contents'][ds]['files'][ datasetMDFile][ 'clowder_id'] = "attached to dataset" writeTaskToDatabase(updatedTask) else: # Remove metadata from activeTasks on success even if file upload fails in next step, so we don't repeat md logger.info(" [%s] added metadata" % ds, extra={ "dataset_name": ds, "dataset_id": dsid, "action": "METADATA ADDED", "metadata": datasetMD }) del updatedTask['contents'][ds]['md'] writeTaskToDatabase(updatedTask) if len(fileFormData) > 0: # Upload collected files for this dataset # Boundary encoding from http://stackoverflow.com/questions/17982741/python-using-reuests-library-for-multipart-form-data logger.info(" [%s] uploading unprocessed files" % ds, extra={ "dataset_id": dsid, "dataset_name": ds, "action": "UPLOADING FILES", "filelist": fileFormData }) (content, header) = encode_multipart_formdata(fileFormData) fi = sess.post(clowder_host + "/api/uploadToDataset/" + dsid, headers={'Content-Type': header}, data=content) if fi.status_code in [104, 500, 502, 504]: logger.error( "[%s] failed to attach files (%s: %s)" % (ds, fi.status_code, fi.text)) updatedTask['contents'][ds]['files'][ datasetMDFile]['retry'] = "%s: %s" % ( fi.status_code, fi.text) response = "RETRY" elif fi.status_code != 200: logger.error( "[%s] failed to attach files (%s: %s)" % (ds, fi.status_code, fi.text)) updatedTask['contents'][ds]['files'][ datasetMDFile]['error'] = "%s: %s" % ( fi.status_code, fi.text) response = "ERROR" else: loaded = fi.json() if 'ids' in loaded: for fobj in loaded['ids']: logger.info(" [%s] added file %s" % (ds, fobj['name'])) for fkey in updatedTask['contents'][ds][ 'files']: if updatedTask['contents'][ds][ 'files'][fkey]['name'] == fobj[ 'name']: updatedTask['contents'][ds][ 'files'][fkey][ 'clowder_id'] = fobj['id'] # remove any previous retry/error messages if 'retry' in updatedTask[ 'contents'][ds]['files'][ fkey]: del ( updatedTask['contents'][ds] ['files'][fkey]['retry']) if 'error' in updatedTask[ 'contents'][ds]['files'][ fkey]: del ( updatedTask['contents'][ds] ['files'][fkey]['error']) break writeTaskToDatabase(updatedTask) else: logger.info(" [%s] added file %s" % (ds, lastFile)) updatedTask['contents'][ds]['files'][ lastFileKey]['clowder_id'] = loaded['id'] # remove any previous retry/error messages if 'retry' in updatedTask['contents'][ds][ 'files'][lastFileKey]: del (updatedTask['contents'][ds]['files'] [lastFileKey]['retry']) if 'error' in updatedTask['contents'][ds][ 'files'][lastFileKey]: del (updatedTask['contents'][ds]['files'] [lastFileKey]['error']) writeTaskToDatabase(updatedTask) return response else: logger.error("%s task: no credentials for Globus user %s" % (task['globus_id'], globUser)) return "ERROR"
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) if type(parameters) is str: parameters = json.loads(parameters) if 'parameters' in parameters: parameters = parameters['parameters'] if type(parameters) is unicode: parameters = json.loads(str(parameters)) # Input path will suggest which sensor we are seeing sensor_type = None for f in resource['files']: filepath = f['filepath'] for sens in ["rgb_geotiff", "ir_geotiff", "laser3d_heightmap"]: if filepath.find(sens) > -1: sensor_type = sens.split("_")[0] break if sensor_type is not None: break # dataset_name = "Full Field - 2017-01-01" dataset_name = parameters["output_dataset"] scan_name = parameters["scan_type"] if "scan_type" in parameters else "" timestamp = dataset_name.split(" - ")[1] out_tif_full = self.sensors.create_sensor_path( timestamp, opts=[sensor_type, scan_name]) out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif") out_vrt = out_tif_full.replace(".tif", ".vrt") out_dir = os.path.dirname(out_vrt) if os.path.exists(out_vrt) and not self.overwrite: self.log_skip(resource, "%s already exists; ending process" % out_vrt) return if not self.darker or sensor_type != 'rgb': (nu_created, nu_bytes) = self.generateSingleMosaic( connector, host, secret_key, sensor_type, out_dir, out_vrt, out_tif_thumb, out_tif_full, parameters, resource) else: (nu_created, nu_bytes) = self.generateDarkerMosaic( connector, host, secret_key, sensor_type, out_dir, out_vrt, out_tif_thumb, out_tif_full, parameters, resource) self.created += nu_created self.bytes += nu_bytes # Get dataset ID or create it, creating parent collections as needed target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], leaf_ds_name=dataset_name) # Upload full field image to Clowder content = { "comment": "This stitched image is computed based on an assumption that the scene is planar. \ There are likely to be be small offsets near the boundary of two images anytime there are plants \ at the boundary (because those plants are higher than the ground plane), or where the dirt is \ slightly higher or lower than average.", "file_ids": parameters["file_paths"] } if os.path.exists(out_tif_thumb): thumbid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_tif_thumb) thumbmeta = build_metadata(host, self.extractor_info, thumbid, content, 'file') upload_metadata(connector, host, secret_key, thumbid, thumbmeta) if os.path.exists(out_tif_full): fullid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_tif_full) fullmeta = build_metadata(host, self.extractor_info, fullid, content, 'file') upload_metadata(connector, host, secret_key, fullid, fullmeta) self.end_message(resource)
def upload_to_clowder(dir, type, scan): conn = Connector(None , mounted_paths={"/home/clowder/sites":"/home/clowder/sites"}) if args.type == "rgb_geotiff": print("Submission of RGB GeoTIFF would happen now") return disp = "RGB GeoTIFFs" timestamp = dir.split("/")[-2] target_dsid = build_dataset_hierarchy(host, secret_key, clow_user, clow_pass, clowspace, disp, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=disp+' - '+timestamp) output_ids = {} # First, upload actual files for targ_file in ["rgb_geotiff_L1_ua-mac_%s_left.tif" % ts, "rgb_geotiff_L1_ua-mac_%s_right.tif" % ts, "rgb_geotiff_L1_ua-mac_%s_nrmac_left.tif" % ts, "rgb_geotiff_L1_ua-mac_%s_nrmac_right.tif" % ts]: targ_path = os.path.join(dir, targ_file) if os.path.isfile(targ_path): file_id = upload_to_dataset(conn, host, clow_user, clow_pass, target_dsid, targ_path) output_ids[targ_file] = file_id # Second, upload metadata ds_md = os.path.join(dir, "clean_metadata.json") if os.path.isfile(ds_md): # Dataset metadata extractor_info = { "extractor_name": "terra.stereo-rgb.bin2tif", "extractor_version": "1.1", "extractor_author": "Max Burnette <*****@*****.**>", "extractor_description": "Stereo RGB Image Bin to GeoTIFF Converter", "extractor_repo": "https://github.com/terraref/extractors-stereo-rgb.git" } with open(ds_md, 'r') as contents: jmd = json.load(contents) upload_dataset_metadata(conn, host, secret_key, clowder_id, jmd) lemna_md = build_metadata(host, extractor_info, target_dsid, jmd, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, lemna_md) nrmac_md = os.path.join(dir, "nrmac_scores.json") if os.path.isfile(nrmac_md): # NRMAC file metadata extractor_info = { "extractor_name": "terra.stereo-rgb.nrmac", "extractor_version": "1.0", "extractor_author": "Sidike Paheding <*****@*****.**>", "extractor_description": "Stereo RGB No-Reference Multiscale Autocorrelation", "extractor_repo": "https://github.com/terraref/quality-metrics.git" } with open(nrmac_md, 'r') as contents: jmd = json.load(contents) fi_id = output_ids["rgb_geotiff_L1_ua-mac_%s_nrmac_left.tif" % ts] ext_meta = build_metadata(host, extractor_info, fi_id, { "quality_score": jmd["quality_score"]["left"] }, 'file') upload_metadata(connector, host, secret_key, fi_id, ext_meta) fi_id = output_ids["rgb_geotiff_L1_ua-mac_%s_nrmac_right.tif" % ts] ext_meta = build_metadata(host, extractor_info, fi_id, { "quality_score": jmd["quality_score"]["right"] }, 'file') upload_metadata(connector, host, secret_key, fi_id, ext_meta) # Write output_ids.json with open(os.path.join(dir, "clowder_ids.json"), 'w') as js: js.write(json.dumps(output_ids)) elif args.type == "fullfield": print("Submission of Full Field Mosaic would happen now") return disp = "Full Field Stitched Mosaics" timestamp = dir.split("/")[-2] target_dsid = build_dataset_hierarchy(host, secret_key, clow_user, clow_pass, clowspace, disp, timestamp[:4], timestamp[5:7], leaf_ds_name=disp+' - '+timestamp) # TODO: Can each scan be in a separate folder in Clowder? output_ids = {} # First, upload NRMAC files for targ_file in ["fullfield_L1_ua-mac_%s_%s_nrmac.vrt" % (day, scan), "fullfield_L1_ua-mac_%s_%s_nrmac.tif" % (day, scan)]: targ_path = os.path.join(dir, targ_file) if os.path.isfile(targ_path): file_id = upload_to_dataset(conn, host, clow_user, clow_pass, target_dsid, targ_path) output_ids[targ_file] = file_id # Second, upload main stitched files for targ_file in ["fullfield_L1_ua-mac_%s_%s.vrt" % (day, scan), "fullfield_L1_ua-mac_%s_%s.tif" % (day, scan), "fullfield_L1_ua-mac_%s_%s_thumb.tif" % (day, scan), "fullfield_L1_ua-mac_%s_%s_10pct.tif" % (day, scan), "fullfield_L1_ua-mac_%s_%s.png" % (day, scan)]: targ_path = os.path.join(dir, targ_file) if os.path.isfile(targ_path): file_id = upload_to_dataset(conn, host, clow_user, clow_pass, target_dsid, targ_path) output_ids[targ_file] = file_id # Third, upload trait CSV files for targ_file in ["fullfield_L1_ua-mac_%s_%s_canopycover_bety.csv" % (day, scan), "fullfield_L1_ua-mac_%s_%s_canopycover_geo.csv" % (day, scan)]: targ_path = os.path.join(dir, targ_file) if os.path.isfile(targ_path): file_id = upload_to_dataset(conn, host, clow_user, clow_pass, target_dsid, targ_path) output_ids[targ_file] = file_id # Write output_ids.json with open(os.path.join(dir, "clowder_ids.json"), 'w') as js: js.write(json.dumps(output_ids))
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Get bin files and metadata metadata = None for f in resource['local_paths']: # First check metadata attached to dataset in Clowder for item of interest if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) metadata = get_extractor_metadata(all_dsmd) # Otherwise, check if metadata was uploaded as a .json file elif f.endswith('_metadata.json') and f.find( '/_metadata.json') == -1 and metadata is None: metadata = load_json_file(f) frames = {} for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 for f in resource['files']: if f['filename'].endswith(format_ind + '.bin'): frames[ind] = f['filename'] if None in [metadata] or len(frames) < 101: logging.error('could not find all of frames/metadata') return # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] hist_path = self.sensors.create_sensor_path(timestamp, opts=['combined_hist']) coloredImg_path = self.sensors.create_sensor_path( timestamp, opts=['combined_pseudocolored']) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy( connector, host, secret_key, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[:7], timestamp[:10], leaf_ds_name=resource['dataset_info']['name']) img_width = 1936 img_height = 1216 png_frames = {} # skip 0101.bin since 101 is an XML file that lists the frame times for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 png_path = self.sensors.create_sensor_path(timestamp, opts=[format_ind]) png_frames[ind] = png_path if not os.path.exists(png_path) or self.overwrite: logging.info("...generating and uploading %s" % png_path) pixels = numpy.fromfile(frames[ind], numpy.dtype('uint8')).reshape( [img_height, img_width]) create_image(pixels, png_path) if png_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(png_path) # Generate aggregate outputs logging.info("...generating aggregates") if not (os.path.exists(hist_path) and os.path.exists(coloredImg_path)) or self.overwrite: psiiCore.psii_analysis(png_frames, hist_path, coloredImg_path) self.created += 2 self.bytes += os.path.getsize(hist_path) self.bytes += os.path.getsize(coloredImg_path) if hist_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, hist_path) uploaded_file_ids.append(fileid) if coloredImg_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, coloredImg_path) uploaded_file_ids.append(fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Get BIN file and metadata bin_file, metadata = None, None for f in resource['local_paths']: # First check metadata attached to dataset in Clowder for item of interest if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) metadata = get_terraref_metadata(all_dsmd, 'flirIrCamera') # Otherwise, check if metadata was uploaded as a .json file elif f.endswith('_ir.bin'): bin_file = f if None in [bin_file, metadata]: logging.getLogger(__name__).error( 'could not find all both of ir.bin/metadata') return # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] png_path = self.sensors.create_sensor_path(timestamp, ext='png') tiff_path = self.sensors.create_sensor_path(timestamp) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) skipped_png = False if not os.path.exists(png_path) or self.overwrite: logging.getLogger(__name__).info("Generating %s" % png_path) # get raw data from bin file raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape( [480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) create_image(raw_data, png_path, self.scale_values) # Only upload the newly generated file to Clowder if it isn't already in dataset if png_path not in resource["local_paths"]: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(png_path) else: skipped_png = True if not os.path.exists(tiff_path) or self.overwrite: logging.getLogger(__name__).info("Generating temperature matrix") gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['flirIrCamera']['bounding_box']) if skipped_png: raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape( [480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) tc = getFlir.rawData_to_temperature(raw_data, metadata) # get temperature logging.getLogger(__name__).info("Creating %s" % tiff_path) # Rename temporary tif after creation to avoid long path errors out_tmp_tiff = os.path.join(tempfile.gettempdir(), resource['id'].encode('utf8')) create_geotiff(tc, gps_bounds, out_tmp_tiff, None, True, self.extractor_info, metadata) shutil.move(out_tmp_tiff, tiff_path) if tiff_path not in resource["local_paths"]: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(tiff_path) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], metadata) # Upload original Lemnatec metadata to new Level_1 dataset md = get_terraref_metadata(all_dsmd) md['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] lemna_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, lemna_md) self.end_message()