def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) with open(resource['local_paths'][0], 'r') as inputcsv: inputlines = inputcsv.readlines() if len(inputlines) <= 1: # first check if there is data besides header line self.log_info(resource, "no trait lines found in CSV; skipping upload") else: # submit CSV to BETY self.log_info( resource, "found %s trait lines; submitting CSV to bety" % str(len(inputlines) - 1)) submit_traits(resource['local_paths'][0], betykey=self.bety_key) # Add metadata to original dataset indicating this was run self.log_info(resource, "updating file metadata (%s)" % resource['id']) ext_meta = build_metadata(host, self.extractor_info, resource['id'], {}, 'file') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def insert_height_traits_into_betydb(in_dir, out_dir, str_date, param_percentile, sensor_d, convt): if not os.path.isdir(out_dir): os.makedirs(out_dir) hist_e, hist_w = analysis_utils.load_histogram_from_both_npy(in_dir) out_file = os.path.join(out_dir, str_date + '_height.csv') csv = open(out_file, 'w') (fields, traits) = analysis_utils.get_traits_table_height() csv.write(','.join(map(str, fields)) + '\n') for j in range(0, PLOT_COL_NUM * PLOT_RANGE_NUM): targetHist_e = hist_e[j, :] targetHist_w = hist_w[j, :] plotNum = j + 1 if (targetHist_e.max() == 0) or (targetHist_w.max()) == 0: continue else: targetHist_e = targetHist_e / np.sum(targetHist_e) quantiles_e = np.cumsum(targetHist_e) b = np.arange(len(quantiles_e)) c = b[quantiles_e > param_percentile] quantile_e = min(c) targetHist_w = targetHist_w / np.sum(targetHist_w) quantiles_w = np.cumsum(targetHist_w) b = np.arange(len(quantiles_w)) c = b[quantiles_w > param_percentile] quantile_w = min(c) estHeight = (quantile_e + quantile_w) / 2 str_time = str_date + 'T12:00:00' traits['local_datetime'] = str_time traits['canopy_height'] = str( (B_F_SLOPE * float(estHeight) + B_F_OFFSET) / 100.0) traits['site'] = analysis_utils.parse_site_from_plotNum_1728( plotNum, convt) trait_list = analysis_utils.generate_traits_list_height(traits) csv.write(','.join(map(str, trait_list)) + '\n') csv.close() #submitToBety(out_file) betydb.submit_traits(out_file, filetype='csv', betykey=betydb.get_bety_key(), betyurl=betydb.get_bety_url()) return
def upload_to_bety(file, clowder_id): conn = Connector( None, mounted_paths={"/home/clowder/sites": "/home/clowder/sites"}) submit_traits(file, betykey=bety_key) # Extractor metadata extractor_info = { "extractor_name": "terra.betydb", "extractor_version": "1.0", "extractor_author": "Max Burnette <*****@*****.**>", "extractor_description": "BETYdb CSV uploader", "extractor_repo": "https://github.com/terraref/computing-pipeline.git" } # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, extractor_info, clowder_id, { "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/v1/variables?name=canopy_cover" }, 'file') upload_metadata(conn, host, secret_key, clowder_id, ext_meta)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Find input files in dataset target_files = { 'raw': None, 'raw.hdr': None, 'image.jpg': None, 'frameIndex.txt': None, 'settings.txt': None, "_metadata.json": None } metafile = None ds_metafile = None last_path = None path_match = None tempdir = None symlinks = [] for f in resource['local_paths']: for fileExt in target_files.keys(): if f.endswith(fileExt): if fileExt != '_metadata.json': filedir = os.path.dirname(f) if not last_path: last_path = filedir else: if filedir != last_path: path_match = False last_path = filedir target_files[fileExt] = {'filename': os.path.basename(f), 'path': f} else: if f.endswith('/_dataset_metadata.json'): ds_metafile = f elif not f.endswith('/_metadata.json'): metafile = f target_files['_metadata.json'] = {'filename': os.path.basename(metafile), 'path': metafile} # Identify md file either with other dataset files, or attached to Clowder dataset if metafile == None: if ds_metafile != None: # Found dataset metadata, so check for the .json file alongside other files logging.getLogger(__name__).info("...checking for local metadata file alongside other files") ds_dir = os.path.dirname(target_files['raw']['path']) for ds_f in os.path.listdir(ds_dir): if ds_f.endswith("_metadata.json"): target_files['_metadata.json']['path'] = os.path.join(ds_dir, ds_f) else: raise ValueError('could not locate metadata for %s' % resource['id']) # Create symlinks in one directory if inputs aren't in the same one if not path_match: tempdir = tempfile.mkdtemp() for f in target_files.keys(): currf = target_files[f] if currf['filename'] == '_dataset_metadata.json': # Open the temporary file and change the JSON content format with open(currf['path'], 'r') as mdfile: jsondata = json.load(mdfile) md = get_terraref_metadata(jsondata) with open(currf['path'], 'w') as mdfile: json.dump(md, mdfile) newf = os.path.join(tempdir, target_files['raw']['filename'].replace("_raw","")+'_metadata.json') else: newf = os.path.join(tempdir, currf['filename']) os.symlink(currf['path'], newf) symlinks.append(newf) # Adjust sensor path based on VNIR vs SWIR and check for soil mask timestamp = resource['dataset_info']['name'].split(" - ")[1] if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' soil_mask = None else: sensor_fullname = 'vnir_netcdf' # Check for corresponding soil mask to include in workflow.sh if available soil_mask = self.sensors.get_sensor_path(timestamp, sensor='vnir_soil_masks', opts=['soil_mask']) outFilePath = self.sensors.create_sensor_path(timestamp, sensor=sensor_fullname) # Invoke terraref.sh logging.getLogger(__name__).info('invoking hyperspectral_workflow.sh to create: %s' % outFilePath) # TODO: Move this script_path = "/projects/arpae/terraref/shared/extractors/extractors-hyperspectral/hyperspectral/hyperspectral_workflow.sh" if soil_mask: returncode = subprocess.call(["bash", script_path, "-d", "1", "-h", "-m", soil_mask, "--new_clb_mth", "-i", target_files['raw']['path'], "-o", outFilePath]) else: returncode = subprocess.call(["bash", script_path, "-d", "1", "-h", "--new_clb_mth", "-i", target_files['raw']['path'], "-o", outFilePath]) # Verify outfile exists and upload to clowder logging.getLogger(__name__).info('done creating output file (%s)' % (returncode)) if returncode != 0: raise ValueError('script encountered an error') if os.path.exists(outFilePath): if returncode == 0: if outFilePath not in resource['local_paths']: target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(sensor=sensor_fullname), timestamp[:4], timestamp[:7], timestamp[:10], leaf_ds_name=self.sensors.get_display_name(sensor=sensor_fullname)+' - '+timestamp) logging.getLogger(__name__).info('uploading %s' % outFilePath) upload_to_dataset(connector, host, secret_key, target_dsid, outFilePath) self.created += 1 self.bytes += os.path.getsize(outFilePath) else: logging.getLogger(__name__).error('no output file was produced') # Send indices to betyDB ind_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['_ind']) with Dataset(ind_file, "r") as netCDF_handle: ndvi = netCDF_handle.get_variables_by_attributes( standard_name='normalized_difference_chlorophyll_index_750_705') NDVI705 = ndvi[0].getValue().ravel()[0] # TODO: Create CSV using ndviVal as primary key tmp_csv = 'traits.csv' plot_no = 'Full Field' csv_header = 'local_datetime,NDVI705,access_level,species,site,' \ 'citation_author,citation_year,citation_title,method' csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \ 'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % ( timestamp, NDVI705, plot_no) with open(tmp_csv, 'w') as c: c.write(csv_header+'\n'+csv_vals) submit_traits(tmp_csv, bety_key=self.bety_key) # Remove symlinks and temp directory for sym in symlinks: os.remove(sym) if tempdir: os.rmdir(tempdir) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() tmp_csv = "canopycovertraits.csv" csv_file = open(tmp_csv, 'w') (fields, traits) = ccCore.get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') # Get full list of experiment plots using date as filter logging.info(connector) logging.info(host) logging.info(secret_key) logging.info(resource) ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot try: (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) if len(pxarray.shape) < 3: logging.error("unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue ccVal = ccCore.gen_cc_for_img(rollaxis(pxarray, 0, 3), 5) ccVal *= 100.0 # Make 0-100 instead of 0-1 successful_plots += 1 if successful_plots % 10 == 0: logging.info("processed %s/%s plots successfully" % (successful_plots, len(all_plots))) except: logging.error("error generating cc for %s" % plotname) continue traits['canopy_cover'] = str(ccVal) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = ccCore.generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], "canopy_cover": ccVal } create_datapoint_with_dependencies( connector, host, secret_key, "Canopy Cover", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) # submit CSV to BETY csv_file.close() submit_traits(tmp_csv, betykey=self.bety_key) # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots, "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=canopy_cover" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() tmp_csv = "meantemptraits.csv" csv_file = open(tmp_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) dsmd = download_metadata(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) #tc = getFlir.rawData_to_temperature(pxarray, terramd) # get temperature # Filter out any pxarray[pxarray < 0] = numpy.nan mean_tc = numpy.nanmean(pxarray) - 273.15 # Create BETY-ready CSV if not numpy.isnan(mean_tc): traits['surface_temperature'] = str(mean_tc) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = generate_traits_list(traits) #generate_csv(tmp_csv, fields, trait_list) csv_file.write(','.join(map(str, trait_list)) + '\n') # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], "surface_temperature": str(mean_tc) } create_datapoint_with_dependencies( connector, host, secret_key, "IR Surface Temperature", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) successful_plots += 1 # submit CSV to BETY csv_file.close() submit_traits(tmp_csv, betykey=self.bety_key) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots, "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] all_plots = get_site_boundaries(timestamp, city='Maricopa') successful_plots = 0 for plotname in all_plots: bounds = all_plots[plotname] # Use GeoJSON string to clip full field to this plot try: (pxarray, geotrans) = clip_raster(resource['local_paths'][0], bounds) if len(pxarray.shape) < 3: logging.error("unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue plot_img = create_image(pxarray, "plot_image.png") plot_csv = "plot.csv" self.generate_table_only(plot_img, plot_csv) trait_vals = self.extract_vals_from_csv(plot_csv) successful_plots += 1 if successful_plots % 10 == 0: logging.info("processed %s/%s plots successfully" % (successful_plots, len(all_plots))) except: logging.error("error generating traits for %s" % plotname) continue # Create BETY-ready CSV (fields, traits) = self.get_traits_table() for tr in trait_vals: traits[tr] = str(trait_vals[tr]) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12-00-00-000" trait_list = self.generate_traits_list(traits) self.generate_cc_csv(plot_csv, fields, trait_list) # submit CSV to BETY submit_traits(plot_csv, self.bety_key) # Prepare and submit datapoint centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] time_fmt = timestamp + "T12:00:00-07:00" dpmetadata = { "source": host + "files/" + resource['id'], } for tr in trait_vals: dpmetadata[tr] = str(trait_vals[tr]) create_datapoint_with_dependencies( connector, host, secret_key, "Canopy Cover", (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) os.remove(plot_img) os.remove(plot_csv) # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, self.extractor_info, resource['parent']['id'], { "plots_processed": successful_plots, "plots_skipped": len(all_plots) - successful_plots # TODO: add link to BETY trait IDs }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() (fields, traits) = pcia.get_traits_table() # get imgs paths, filter out the json paths img_paths = [] for p in resource['local_paths']: if p[-4:] == '.jpg' or p[-4:] == '.png': img_paths.append(p) # Get metadata for avg_traits from file metadata elif p.endswith("_metadata.json" ) and not p.endswith("_dataset_metadata.json"): with open(p) as ds_md: md_set = json.load(ds_md) for md in md_set: if 'content' in md: needed_fields = [ 'plant_barcode', 'genotype', 'treatment', 'imagedate' ] for fld in needed_fields: if traits[fld] == '' and fld in md['content']: if fld == 'imagedate': traits[fld] = md['content'][ fld].replace(" ", "T") else: traits[fld] = md['content'][fld] # build list of file descriptor dictionaries with sensor info file_objs = [] for f in resource['files']: found_info = False image_id = f['id'] # Get from file metadata if possible file_md = download_file_metadata(connector, host, secret_key, f['id']) for md in file_md: if 'content' in md: mdc = md['content'] if ('rotation_angle' in mdc) and ('perspective' in mdc) and ('camera_type' in mdc): if 'experiment_id' in mdc: found_info = True # experiment ID determines what PlantCV code gets executed experiment = mdc['experiment_id'] # perspective = 'side-view' / 'top-view' perspective = mdc['perspective'] # angle = -1, 0, 90, 180, 270; set top-view angle to be -1 for later sorting angle = mdc[ 'rotation_angle'] if perspective != 'top-view' else -1 # camera_type = 'visible/RGB' / 'near-infrared' camera_type = mdc['camera_type'] for pth in img_paths: if re.findall(f['filename'], pth): file_objs.append({ 'perspective': perspective, 'angle': angle, 'camera_type': camera_type, 'image_path': pth, 'image_id': image_id, 'experiment_id': experiment, 'filename': f['filename'] }) if not found_info: # Get from filename if no metadata is found raw_name = re.findall( r"(VIS|NIR|vis|nir)_(SV|TV|sv|tv)(_\d+)*", f["filename"]) if raw_name: raw_int = re.findall('\d+', raw_name[0][2]) angle = -1 if raw_int == [] else int( raw_int[0]) # -1 for top-view, else angle camera_type = raw_name[0][0] perspective = raw_name[0][1].lower() for pth in img_paths: if re.findall(f['filename'], pth): file_objs.append({ 'perspective': 'side-view' if perspective == 'sv' else 'top-view', 'angle': angle, 'camera_type': 'visible/RGB' if camera_type == 'vis' else 'near-infrared', 'image_path': pth, 'image_id': image_id, 'experiment_id': 'unknown', 'filename': f['filename'] }) # sort file objs by angle file_objs = sorted(file_objs, key=lambda k: k['angle']) # process images by matching angles with plantcv for i in [x for x in range(len(file_objs)) if x % 2 == 0]: if file_objs[i]['camera_type'] == 'visible/RGB': vis_src = file_objs[i]['image_path'] nir_src = file_objs[i + 1]['image_path'] vis_id = file_objs[i]['image_id'] nir_id = file_objs[i + 1]['image_id'] experiment_id = file_objs[i]['experiment_id'] vis_filename = file_objs[i]['filename'] nir_filename = file_objs[i + 1]['filename'] else: vis_src = file_objs[i + 1]['image_path'] nir_src = file_objs[i]['image_path'] vis_id = file_objs[i + 1]['image_id'] nir_id = file_objs[i]['image_id'] experiment_id = file_objs[i + 1]['experiment_id'] vis_filename = file_objs[i + 1]['filename'] nir_filename = file_objs[i]['filename'] logging.info( '...processing: %s + %s' % (os.path.basename(vis_src), os.path.basename(nir_src))) # Read VIS image img, path, filename = pcv.readimage(vis_src) brass_mask = cv2.imread('masks/mask_brass_tv_z1_L1.png') # Read NIR image nir, path1, filename1 = pcv.readimage(nir_src) nir2 = cv2.imread(nir_src, -1) try: vis_out = os.path.join(self.output_dir, resource['dataset_info']['name'], vis_filename) nir_out = os.path.join(self.output_dir, resource['dataset_info']['name'], nir_filename) if i == 0: vn_traits = pcia.process_tv_images_core( vis_id, img, nir_id, nir, nir2, brass_mask, traits, experiment_id, vis_out, nir_out) else: vn_traits = pcia.process_sv_images_core( vis_id, img, nir_id, nir, nir2, traits, experiment_id, vis_out, nir_out) logging.getLogger(__name__).info( "...uploading resulting metadata") # upload the individual file metadata metadata = build_metadata(host, self.extractor_info, nir_id, vn_traits[0], 'file') upload_file_metadata(connector, host, secret_key, vis_id, metadata) metadata = build_metadata(host, self.extractor_info, nir_id, vn_traits[1], 'file') upload_file_metadata(connector, host, secret_key, nir_id, metadata) # Add PlantCV analysis images to dataset for image in vn_traits[2]: pyclowder.files.upload_to_dataset(connector, host, secret_key, resource['id'], image) except Exception as e: logging.getLogger(__name__).error( "...error generating vn_traits data; no metadata uploaded") logging.getLogger(__name__).error(e) # compose the summary traits trait_list = pcia.generate_traits_list(traits) # generate output CSV & send to Clowder + BETY tmp_csv = 'avg_traits.csv' pcia.generate_average_csv(tmp_csv, fields, trait_list) submit_traits(tmp_csv, self.bety_key) # Flag dataset as processed by extractor metadata = build_metadata(host, self.extractor_info, resource['id'], {"status": "COMPLETED"}, 'dataset') upload_ds_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message()