def clip_raster_intersection(file_path: str, file_bounds: str, plot_bounds: str, out_file: str) -> Optional[int]: """Clips the raster to the intersection of the file bounds and plot bounds Arguments: file_path: the path to the source file file_bounds: the geometric boundary of the source file as JSON plot_bounds: the geometric boundary of the plot to clip to as JSON out_file: the path to store the clipped image Return: The number of pixels in the new image, or None if no pixels were saved Notes: Assumes the boundaries are in the same coordinate system Exceptions: Raises RuntimeError if the polygons are invalid """ logging.debug( "Clip to intersect of plot boundary: File: '%s' '%s' Plot: '%s'", file_path, str(file_bounds), str(plot_bounds)) try: file_poly = ogr.CreateGeometryFromJson(str(file_bounds)) plot_poly = ogr.CreateGeometryFromJson(str(plot_bounds)) if not file_poly or not plot_poly: logging.error( "Invalid polygon specified for clip_raster_intersection: File: '%s' plot: '%s'", str(file_bounds), str(plot_bounds)) raise RuntimeError( "One or more invalid polygons specified when clipping raster" ) intersection = file_poly.Intersection(plot_poly) if not intersection or not intersection.Area(): logging.info("File does not intersect plot boundary: %s", file_path) return None # Make sure we pass a multipolygon down to the tuple converter if intersection.GetGeometryName().startswith('MULTI'): multi_polygon = intersection else: multi_polygon = ogr.Geometry(ogr.wkbMultiPolygon) multi_polygon.AddGeometry(intersection) # Proceed to clip to the intersection tuples = __internal__.geojson_to_tuples( geometry_to_geojson(multi_polygon)) return clip_raster(file_path, tuples, out_path=out_file, compress=True) except Exception as ex: logging.exception( "Exception caught while clipping image to plot intersection") raise ex
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Load metadata from dataset for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd) if 'spatial_metadata' in terra_md_full: spatial_meta = terra_md_full['spatial_metadata'] else: spatial_meta = None if not spatial_meta: ValueError("No spatial metadata found.") # Determine which files in dataset need clipping files_to_process = {} for f in resource['local_paths']: if f.startswith("ir_geotiff") and f.endswith(".tif"): sensor_name = "ir_geotiff" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": spatial_meta['flirIrCamera']['bounding_box'] } elif f.startswith("rgb_geotiff") and f.endswith(".tif"): sensor_name = "rgb_geotiff" filename = os.path.basename(f) if f.endswith("_left.tif"): side = "left" else: side = "right" files_to_process[filename] = { "path": f, "bounds": spatial_meta[side]['bounding_box'] } elif f.endswith(".las"): sensor_name = "laser3d_las" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": get_las_extents(f) } # TODO: Add case for laser3d heightmap # Fetch experiment name from terra metadata timestamp = resource['dataset_info']['name'].split(" - ")[1] season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine script name target_scan = "unknown_scan" if 'gantry_variable_metadata' in terra_md_full: if 'script_name' in terra_md_full['gantry_variable_metadata']: target_scan = terra_md_full['gantry_variable_metadata']['script_name'] if 'script_hash' in terra_md_full['gantry_variable_metadata']: target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash'] all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa') uploaded_file_ids = [] for filename in files_to_process: file_path = files_to_process[filename]["path"] file_bounds = files_to_process[filename]["bounds"] overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True) if len(overlap_plots) > 0: self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots))) for plotname in overlap_plots: plot_bounds = overlap_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds)) plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)" leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0] self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset) out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename) if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite): """If file is a geoTIFF, simply clip it and upload it to Clowder""" clip_raster(file_path, tuples, out_path=out_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) elif filename.endswith(".las"): """If file is LAS, we can merge with any existing scan+plot output safely""" merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las") merged_txt = merged_out.replace(".las", "_contents.txt") already_merged = False if os.path.exists(merged_txt): # Check if contents with open(merged_txt, 'r') as contents: for entry in contents.readlines(): if entry.strip() == file_path: already_merged = True break if not already_merged: clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out) with open(merged_txt, 'a') as contents: contents.write(file_path+"\n") # Upload the individual plot shards for optimizing las2height later found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_file) # Upload the merged result if necessary found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) # Trigger las2height extractor submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height") # Tell Clowder this is completed so subsequent file updates don't daisy-chain extractor_md = build_metadata(host, self.extractor_info, resource['id'], { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to Level_1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Write the CSV to the same directory as the source file ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] time_fmt = timestamp + "T12:00:00-07:00" rootdir = self.sensors.create_sensor_path(timestamp, sensor="rgb_fullfield", ext=".csv") out_csv = os.path.join( os.path.dirname(rootdir), resource['name'].replace(".tif", "_canopycover_bety.csv")) out_geo = os.path.join( os.path.dirname(rootdir), resource['name'].replace(".tif", "_canopycover_geo.csv")) # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata... self.log_info(resource, "Writing BETY CSV to %s" % out_csv) csv_file = open(out_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo) geo_file = open(out_geo, 'w') geo_file.write(','.join([ 'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp' ]) + '\n') # Get full list of experiment plots using date as filter all_plots = get_site_boundaries(timestamp, city='Maricopa') self.log_info(resource, "found %s plots on %s" % (len(all_plots), timestamp)) successful_plots = 0 for plotname in all_plots: if plotname.find("KSU") > -1: self.log_info(resource, "skipping %s" % plotname) continue bounds = all_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds)) centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] # Use GeoJSON string to clip full field to this plot try: pxarray = clip_raster(resource['local_paths'][0], tuples) if pxarray is not None: if len(pxarray.shape) < 3: self.log_error( resource, "unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue ccVal = calculate_canopycover_masked( rollaxis(pxarray, 0, 3)) if (ccVal > -1): # Prepare and submit datapoint geo_file.write(','.join([ plotname, 'Canopy Cover', str(centroid_lonlat[1]), str(centroid_lonlat[0]), time_fmt, host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], str(ccVal), timestamp ]) + '\n') successful_plots += 1 if successful_plots % 10 == 0: self.log_info( resource, "processed %s/%s plots" % (successful_plots, len(all_plots))) else: continue except: self.log_error(resource, "error generating cc for %s" % plotname) continue if (ccVal > -1): traits['canopy_cover'] = str(ccVal) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') csv_file.close() geo_file.close() # Upload this CSV to Clowder fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_csv) geoid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_geo) # Add metadata to original dataset indicating this was run self.log_info(resource, "updating file metadata") ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"files_created": [fileid, geoid]}, 'file') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) # Trigger separate extractors self.log_info(resource, "triggering BETY extractor on %s" % fileid) submit_extraction(connector, host, secret_key, fileid, "terra.betydb") self.log_info(resource, "triggering geostreams extractor on %s" % geoid) submit_extraction(connector, host, secret_key, geoid, "terra.geostreams") self.end_message(resource)
def perform_process(transformer: transformer_class.Transformer, check_md: dict, transformer_md: dict, full_md: dict) -> dict: """Performs the processing of the data Arguments: transformer: instance of transformer class Return: Returns a dictionary with the results of processing """ # Setup local variables timestamp = dateutil.parser.parse(check_md['timestamp']) datestamp = timestamp.strftime("%Y-%m-%d") localtime = timestamp.strftime("%Y-%m-%dT%H:%M:%S") geo_csv_filename = os.path.join(check_md['working_folder'], "canopycover_geostreams.csv") bety_csv_filename = os.path.join(check_md['working_folder'], "canopycover.csv") geo_file = open(geo_csv_filename, 'w') bety_file = open(bety_csv_filename, 'w') (fields, traits) = get_traits_table() # Setup default trait values if not transformer.args.germplasmName is None: traits['species'] = transformer.args.germplasmName if not transformer.args.citationAuthor is None: traits['citation_author'] = transformer.args.citationAuthor if not transformer.args.citationTitle is None: traits['citation_title'] = transformer.args.citationTitle if not transformer.args.citationYear is None: traits['citation_year'] = transformer.args.citationYear else: traits['citation_year'] = (timestamp.year) geo_csv_header = ','.join(['site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp']) bety_csv_header = ','.join(map(str, fields)) if geo_file: geo_file.write(geo_csv_header + "\n") if bety_file: bety_file.write(bety_csv_header + "\n") all_plots = get_site_boundaries(datestamp, city='Maricopa') logging.debug("Found %s plots for date %s", str(len(all_plots)), str(datestamp)) # Loop through finding all image files image_exts = SUPPORTED_IMAGE_EXTS num_files = 0 total_plots_calculated = 0 logging.debug("Looking for images with an extension of: %s", ",".join(image_exts)) for one_file in check_md['list_files'](): ext = os.path.splitext(one_file)[1] if not ext or not ext in image_exts: logging.debug("Skipping non-supported file '%s'", one_file) continue image_bounds = get_image_bounds(one_file) if not image_bounds: logging.info("Image file does not appear to be geo-referenced '%s'", one_file) continue overlap_plots = find_plots_intersect_boundingbox(image_bounds, all_plots, fullmac=True) num_plots = len(overlap_plots) if not num_plots or num_plots < 0: logging.info("No plots intersect file '%s'", one_file) continue num_files += 1 image_spatial_ref = get_spatial_reference_from_json(image_bounds) for plot_name in overlap_plots: plot_bounds = convert_json_geometry(overlap_plots[plot_name], image_spatial_ref) tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds)) centroid = json.loads(centroid_from_geojson(plot_bounds))["coordinates"] try: logging.debug("Clipping raster to plot") pxarray = clip_raster(one_file, tuples, os.path.join(check_md['working_folder'], "temp.tif")) if pxarray is not None: if len(pxarray.shape) < 3: logging.warning("Unexpected image dimensions for file '%s'", one_file) logging.warning(" expected 3 and received %s", str(pxarray.shape)) break logging.debug("Calculating canopy cover") cc_val = calculate_canopycover_masked(np.rollaxis(pxarray, 0, 3)) # Write the datapoint geographically and otherwise logging.debug("Writing to CSV files") if geo_file: csv_data = ','.join([plot_name, 'Canopy Cover', str(centroid[1]), str(centroid[0]), localtime, one_file, str(cc_val), datestamp]) geo_file.write(csv_data + "\n") if bety_file: traits['canopy_cover'] = str(cc_val) traits['site'] = plot_name traits['local_datetime'] = localtime trait_list = generate_traits_list(traits) csv_data = ','.join(map(str, trait_list)) bety_file.write(csv_data + "\n") total_plots_calculated += 1 else: continue except Exception as ex: logging.warning("Exception caught while processing canopy cover: %s", str(ex)) logging.warning("Error generating canopy cover for '%s'", one_file) logging.warning(" plot name: '%s'", plot_name) continue # Check that we got something if not num_files: return {'code': -1000, 'error': "No files were processed"} if not total_plots_calculated: return {'code': -1001, 'error': "No plots intersected with the images provided"} # Setup the metadata for returning files file_md = [] if geo_file: file_md.append({'path': geo_csv_filename, 'key': 'csv'}) if bety_file: file_md.append({'path': bety_csv_filename, 'key': 'csv'}) # Perform cleanup if geo_file: geo_file.close() del geo_file if bety_file: bety_file.close() del bety_file return {'code': 0, 'files': file_md}
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] time_fmt = timestamp+"T12:00:00-07:00" out_csv = self.sensors.create_sensor_path(timestamp, sensor="ir_meantemp", opts=["bety"]) out_geo = out_csv.replace("_bety", "_geo") # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata... self.log_info(resource, "Writing BETY CSV to %s" % out_csv) csv_file = open(out_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo) geo_file = open(out_geo, 'w') geo_file.write(','.join(['site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp']) + '\n') successful_plots = 0 nan_plots = 0 all_plots = get_site_boundaries(timestamp, city='Maricopa') for plotname in all_plots: if plotname.find("KSU") > -1: self.log_info(resource, "skipping %s" % plotname) continue bounds = all_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds)) centroid_lonlat = json.loads(centroid_from_geojson(bounds))["coordinates"] # Use GeoJSON string to clip full field to this plot pxarray = clip_raster(resource['local_paths'][0], tuples) # Filter out any pxarray[pxarray < 0] = numpy.nan mean_tc = numpy.nanmean(pxarray) - 273.15 # Create BETY-ready CSV if not numpy.isnan(mean_tc): geo_file.write(','.join([plotname, 'IR Surface Temperature', str(centroid_lonlat[1]), str(centroid_lonlat[0]), time_fmt, host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], str(mean_tc), timestamp]) + '\n') traits['surface_temperature'] = str(mean_tc) traits['site'] = plotname traits['local_datetime'] = timestamp+"T12:00:00" trait_list = generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') else: nan_plots += 1 successful_plots += 1 self.log_info(resource, "skipped %s of %s plots due to NaN" % (nan_plots, len(all_plots))) # submit CSV to BETY csv_file.close() geo_file.close() # Upload CSVs to Clowder fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_csv) geoid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_geo) # Tell Clowder this is completed so subsequent file updates don't daisy-chain self.log_info(resource, "updating file metadata") metadata = build_metadata(host, self.extractor_info, resource['parent']['id'], { "total_plots": len(all_plots), "plots_processed": successful_plots, "blank_plots": nan_plots, "files_created": [fileid, geoid], "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature" }, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata) # Trigger downstream extractors self.log_info(resource, "triggering BETY extractor on %s" % fileid) submit_extraction(connector, host, secret_key, fileid, "terra.betydb") self.log_info(resource, "triggering geostreams extractor on %s" % geoid) submit_extraction(connector, host, secret_key, geoid, "terra.geostreams") self.end_message(resource)
def perform_process(transformer: transformer_class.Transformer, check_md: dict, transformer_md: dict, full_md: list) -> dict: """Performs the processing of the data Arguments: transformer: instance of transformer class check_md: metadata associated with this request transformer_md: metadata associated with this transformer full_md: the full set of metadata Return: Returns a dictionary with the results of processing """ # pylint: disable=unused-argument # loop through the available files and clip data into plot-level files processed_files = 0 processed_plots = 0 start_timestamp = datetime.datetime.now() file_list = check_md['list_files']() files_to_process = __internal__.get_files_to_process( file_list, transformer.args.sensor, transformer.args.epsg) logging.info("Found %s files to process", str(len(files_to_process))) container_md = [] if files_to_process: # Get all the possible plots datestamp = check_md['timestamp'][0:10] all_plots = get_site_boundaries(datestamp, city='Maricopa') logging.debug("Have %s plots for site", len(all_plots)) for filename in files_to_process: processed_files += 1 file_path = files_to_process[filename]['path'] file_bounds = files_to_process[filename]['bounds'] sensor = files_to_process[filename]['sensor_name'] logging.debug("File bounds: %s", str(file_bounds)) overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True) logging.info("Have %s plots intersecting file '%s'", str(len(overlap_plots)), filename) file_spatial_ref = __internal__.get_spatial_reference_from_json( file_bounds) for plot_name in overlap_plots: processed_plots += 1 plot_bounds = convert_json_geometry(overlap_plots[plot_name], file_spatial_ref) logging.debug("Clipping out plot '%s': %s", str(plot_name), str(plot_bounds)) if __internal__.calculate_overlap_percent( plot_bounds, file_bounds) < 0.10: logging.info("Skipping plot with too small overlap: %s", plot_name) continue tuples = __internal__.geojson_to_tuples(plot_bounds) plot_md = __internal__.cleanup_request_md(check_md) plot_md['plot_name'] = plot_name if filename.endswith('.tif'): # If file is a geoTIFF, simply clip it out_path = os.path.join(check_md['working_folder'], plot_name) out_file = os.path.join(out_path, filename) if not os.path.exists(out_path): os.makedirs(out_path) if not transformer.args.full_plot_fill: __internal__.clip_raster_intersection( file_path, file_bounds, plot_bounds, out_file) else: logging.info( "Clipping image to plot boundary with fill") clip_raster(file_path, tuples, out_path=out_file, compress=True) cur_md = __internal__.prepare_container_md( plot_name, plot_md, sensor, file_path, [out_file]) container_md = __internal__.merge_container_md( container_md, cur_md) elif filename.endswith('.las'): out_path = os.path.join(check_md['working_folder'], plot_name) out_file = os.path.join(out_path, filename) if not os.path.exists(out_path): os.makedirs(out_path) __internal__.clip_las(file_path, tuples, out_path=out_file) cur_md = __internal__.prepare_container_md( plot_name, plot_md, sensor, file_path, [out_file]) container_md = __internal__.merge_container_md( container_md, cur_md) return { 'code': 0, 'container': container_md, configuration.TRANSFORMER_NAME: { 'utc_timestamp': datetime.datetime.utcnow().isoformat(), 'processing_time': str(datetime.datetime.now() - start_timestamp), 'total_file_count': len(file_list), 'processed_file_count': processed_files, 'total_plots_processed': processed_plots, 'sensor': transformer.args.sensor } }
def process_message(self, connector, host, secret_key, resource, parameters): """Performs plot level image extraction Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API resource(dict): dictionary containing the resources associated with the request parameters(json): json object of the triggering message contents """ self.start_message(resource) super(ClipByShape, self).process_message(connector, host, secret_key, resource, parameters) # Handle any parameters if isinstance(parameters, basestring): parameters = json.loads(parameters) if isinstance(parameters, unicode): parameters = json.loads(str(parameters)) # Initialize local variables dataset_name = parameters["datasetname"] season_name, experiment_name = "Unknown Season", "Unknown Experiment" datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None # Array containing the links to uploaded files uploaded_file_ids = [] # Find the files we're interested in # pylint: disable=line-too-long (shapefile, shxfile, dbffile, imagefiles) = self.find_shape_image_files(resource['local_paths'], resource['triggering_file']) # pylint: enable=line-too-long if shapefile is None: self.log_skip(resource, "No shapefile found") return if shxfile is None: self.log_skip(resource, "No SHX file found") return num_image_files = len(imagefiles) if num_image_files <= 0: self.log_skip(resource, "No image files with geographic boundaries found") return # Get the best username, password, and space old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace) self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context( ) # Ensure that the clowder information is valid if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user, self.clowder_pass): self.log_error(resource, "Clowder configuration is invalid. Not processing " +\ "request") self.clowder_user, self.clowder_pass, self.clowderspace = ( old_un, old_pw, old_space) self.end_message(resource) return # Change the base path of files to include the user by tweaking the sensor's value sensor_old_base = None if self.get_terraref_metadata is None: _, new_base = self.get_username_with_base_path( host, secret_key, resource['id'], self.sensors.base) sensor_old_base = self.sensors.base self.sensors.base = new_base try: # Build up a list of image IDs image_ids = {} if 'files' in resource: for one_image in imagefiles: image_name = os.path.basename(one_image) for res_file in resource['files']: if ('filename' in res_file) and ('id' in res_file) and \ (image_name == res_file['filename']): image_ids[image_name] = res_file['id'] # Get timestamps. Also get season and experiment information for Clowder collections datestamp = self.find_datestamp(dataset_name) timestamp = timestamp_to_terraref( self.find_timestamp(dataset_name)) (season_name, experiment_name, _) = self.get_season_and_experiment(datestamp, self.sensor_name) if self.experiment_metadata: if 'extractors' in self.experiment_metadata: extractor_json = self.experiment_metadata['extractors'] if 'shapefile' in extractor_json: if 'plot_column_name' in extractor_json['shapefile']: plot_name_idx = extractor_json['shapefile'][ 'plot_column_name'] # Check our current local variables if dbffile is None: self.log_info(resource, "DBF file not found, using default plot naming") self.log_info(resource, "Extracting plots using shapefile '" + \ os.path.basename(shapefile) + "'") # Load the shapes and find the plot name column if we have a DBF file shape_in = ogr.Open(shapefile) layer = shape_in.GetLayer( os.path.split(os.path.splitext(shapefile)[0])[1]) feature = layer.GetNextFeature() layer_ref = layer.GetSpatialRef() if dbffile: shape_table = DBF(dbffile, lowernames=True, ignore_missing_memofile=True) shape_rows = iter(list(shape_table)) # Make sure if we have the column name of plot-names specified that it exists in # the shapefile column_names = shape_table.field_names if not plot_name_idx is None: if not find_all_plot_names(plot_name_idx, column_names): ValueError( resource, "Shapefile data does not have specified plot name" + " column '" + plot_name_idx + "'") # Lookup a plot name field to use if plot_name_idx is None: for one_name in column_names: # pylint: disable=line-too-long if one_name == "observationUnitName": plot_name_idx = one_name break elif (one_name.find('plot') >= 0) and ( (one_name.find('name') >= 0) or one_name.find('id')): plot_name_idx = one_name break elif one_name == 'id': plot_name_idx = one_name break # pylint: enable=line-too-long if plot_name_idx is None: ValueError( resource, "Shapefile data does not have a plot name field '" + os.path.basename(dbffile) + "'") # Setup for the extracted plot images plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \ " (By Plot)" # Loop through each polygon and extract plot level data alternate_plot_id = 0 while feature: # Current geometry to extract plot_poly = feature.GetGeometryRef() if layer_ref: plot_poly.AssignSpatialReference(layer_ref) plot_spatial_ref = plot_poly.GetSpatialReference() # Determie the plot name to use plot_name = None alternate_plot_id = alternate_plot_id + 1 if shape_rows and plot_name_idx: try: row = next(shape_rows) plot_name = get_plot_name(plot_name_idx, row) except StopIteration: pass if not plot_name: plot_name = "plot_" + str(alternate_plot_id) # Determine output dataset name leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, datestamp[:4], datestamp[5:7], datestamp[8:10], leaf_dataset)) # Create the dataset, even if we have no data to put in it, so that the caller knows # it was addressed target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, datestamp[:4], datestamp[5:7], datestamp[8:10], leaf_ds_name=leaf_dataset) # Loop through all the images looking for overlap for filename in imagefiles: # Get the bounds. We also get the reference systems in case we need to convert # between them bounds = imagefiles[filename]['bounds'] bounds_spatial_ref = bounds.GetSpatialReference() # Checking for geographic overlap and skip if there is none if not bounds_spatial_ref.IsSame(plot_spatial_ref): # We need to convert coordinate system before an intersection transform = osr.CoordinateTransformation( bounds_spatial_ref, plot_spatial_ref) new_bounds = bounds.Clone() if new_bounds: new_bounds.Transform(transform) intersection = plot_poly.Intersection(new_bounds) new_bounds = None else: # Same coordinate system. Simple intersection intersection = plot_poly.Intersection(bounds) if intersection.GetArea() == 0.0: self.log_info(resource, "Skipping image: " + filename) continue # Determine where we're putting the clipped file on disk and determine overwrite # pylint: disable=unexpected-keyword-arg out_file = self.sensors.create_sensor_path( timestamp, filename=os.path.basename(filename), plot=plot_name, subsensor=self.sensor_name) if (file_exists(out_file) and not self.overwrite): # The file exists and don't want to overwrite it self.logger.warn("Skipping existing output file: %s", out_file) continue self.log_info( resource, "Attempting to clip '" + filename + "' to polygon number " + str(alternate_plot_id)) # Create destination folder on disk if we haven't done that already if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) # Clip the raster bounds_tuple = polygon_to_tuples_transform( plot_poly, bounds_spatial_ref) clip_pix = clip_raster(filename, bounds_tuple, out_path=out_file) if clip_pix is None: self.log_error( resource, "Failed to clip image to plot name " + plot_name) continue # Upload the clipped image to the dataset found_in_dest = check_file_in_dataset( connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: image_name = os.path.basename(filename) content = { "comment": "Clipped from shapefile " + os.path.basename(shapefile), "imageName": image_name } if image_name in image_ids: content['imageID'] = image_ids[image_name] fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_file) uploaded_file_ids.append(fileid) # Generate our metadata meta = build_metadata(host, self.extractor_info, fileid, content, 'file') clowder_file.upload_metadata(connector, host, secret_key, fileid, meta) else: self.logger.warn( "Skipping existing file in dataset: %s", out_file) self.created += 1 self.bytes += os.path.getsize(out_file) # Get the next shape to extract feature = layer.GetNextFeature() # Tell Clowder this is completed so subsequent file updates don't daisy-chain id_len = len(uploaded_file_ids) if id_len > 0 or self.created > 0: extractor_md = build_metadata( host, self.extractor_info, resource['id'], {"files_created": uploaded_file_ids}, 'dataset') self.log_info( resource, "Uploading shapefile plot extractor metadata to Level_2 dataset: " + str(extractor_md)) clowder_dataset.remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) clowder_dataset.upload_metadata(connector, host, secret_key, resource['id'], extractor_md) else: self.logger.warn( "Skipping dataset metadata updating since no files were loaded" ) finally: # Signal end of processing message and restore changed variables. Be sure to restore # changed variables above with early returns if not sensor_old_base is None: self.sensors.base = sensor_old_base self.clowder_user, self.clowder_pass, self.clowderspace = ( old_un, old_pw, old_space) self.end_message(resource)
def perform_process(transformer: transformer_class.Transformer, check_md: dict, transformer_md: list, full_md: list) -> dict: """Performs the processing of the data Arguments: transformer: instance of transformer class Return: Returns a dictionary with the results of processing """ # pylint: disable=unused-argument # Disabling pylint checks because resolving them would make code unreadable # pylint: disable=too-many-branches, too-many-statements, too-many-locals # Setup local variables start_timestamp = datetime.datetime.now() timestamp = dateutil.parser.parse(check_md['timestamp']) datestamp = timestamp.strftime("%Y-%m-%d") localtime = timestamp.strftime("%Y-%m-%dT%H:%M:%S") geo_csv_filename = os.path.join(check_md['working_folder'], "meantemp_geostreams.csv") bety_csv_filename = os.path.join(check_md['working_folder'], "meantemp.csv") geo_file = open(geo_csv_filename, 'w') bety_file = open(bety_csv_filename, 'w') (fields, traits) = get_traits_table() # Setup default trait values if transformer.args.citationAuthor is not None: traits['citation_author'] = transformer.args.citationAuthor if transformer.args.citationTitle is not None: traits['citation_title'] = transformer.args.citationTitle if transformer.args.citationYear is not None: traits['citation_year'] = transformer.args.citationYear else: traits['citation_year'] = timestamp.year geo_csv_header = ','.join([ 'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp' ]) bety_csv_header = ','.join(map(str, fields)) if geo_file: geo_file.write(geo_csv_header + "\n") if bety_file: bety_file.write(bety_csv_header + "\n") all_plots = get_site_boundaries(datestamp, city='Maricopa') logging.debug("Found %s plots for date %s", str(len(all_plots)), str(datestamp)) # Loop through finding all image files image_exts = SUPPORTED_IMAGE_EXTS num_files = 0 number_empty_plots = 0 total_plots_calculated = 0 total_files = 0 processed_plots = 0 logging.debug("Looking for images with an extension of: %s", ",".join(image_exts)) for one_file in check_md['list_files'](): total_files += 1 ext = os.path.splitext(one_file)[1] if not ext or ext not in image_exts: logging.debug("Skipping non-supported file '%s'", one_file) continue image_bounds = get_image_bounds(one_file) if not image_bounds: logging.info( "Image file does not appear to be geo-referenced '%s'", one_file) continue overlap_plots = find_plots_intersect_boundingbox(image_bounds, all_plots, fullmac=True) num_plots = len(overlap_plots) if not num_plots or num_plots < 0: logging.info("No plots intersect file '%s'", one_file) continue num_files += 1 image_spatial_ref = get_spatial_reference_from_json(image_bounds) for plot_name in overlap_plots: processed_plots += 1 plot_bounds = convert_json_geometry(overlap_plots[plot_name], image_spatial_ref) tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds)) centroid = json.loads( centroid_from_geojson(plot_bounds))["coordinates"] try: logging.debug("Clipping raster to plot") clip_path = os.path.join(check_md['working_folder'], "temp.tif") pxarray = clip_raster(one_file, tuples, clip_path) if os.path.exists(clip_path): os.remove(clip_path) if pxarray is not None: logging.debug("Calculating mean temperature") pxarray[pxarray < 0] = np.nan mean_tc = np.nanmean(pxarray) - 273.15 # Check for empty plots if np.isnan(mean_tc): number_empty_plots += 1 continue # Write the data point geographically and otherwise logging.debug("Writing to CSV files") if geo_file: csv_data = ','.join([ plot_name, 'IR Surface Temperature', str(centroid[1]), str(centroid[0]), localtime, one_file, str(mean_tc), datestamp ]) geo_file.write(csv_data + "\n") if bety_file: traits['surface_temperature'] = str(mean_tc) traits['site'] = plot_name traits['local_datetime'] = localtime trait_list = generate_traits_list(traits) csv_data = ','.join(map(str, trait_list)) bety_file.write(csv_data + "\n") total_plots_calculated += 1 else: continue except Exception as ex: logging.warning( "Exception caught while processing mean temperature: %s", str(ex)) logging.warning("Error generating mean temperature for '%s'", one_file) logging.warning(" plot name: '%s'", plot_name) continue # Check that we got something if not num_files: return {'code': -1000, 'error': "No files were processed"} if not total_plots_calculated: return { 'code': -1001, 'error': "No plots intersected with the images provided" } # Setup the metadata for returning files file_md = [] if geo_file: file_md.append({'path': geo_csv_filename, 'key': 'csv'}) if bety_file: file_md.append({'path': bety_csv_filename, 'key': 'csv'}) # Perform cleanup if geo_file: geo_file.close() if bety_file: bety_file.close() return { 'code': 0, 'files': file_md, configuration.TRANSFORMER_NAME: { 'version': configuration.TRANSFORMER_VERSION, 'utc_timestamp': datetime.datetime.utcnow().isoformat(), 'processing_time': str(datetime.datetime.now() - start_timestamp), 'total_file_count': total_files, 'processed_file_count': num_files, 'total_plots_processed': processed_plots, 'empty_plots': number_empty_plots } }