예제 #1
0
    def clip_raster_intersection(file_path: str, file_bounds: str,
                                 plot_bounds: str,
                                 out_file: str) -> Optional[int]:
        """Clips the raster to the intersection of the file bounds and plot bounds
        Arguments:
            file_path: the path to the source file
            file_bounds: the geometric boundary of the source file as JSON
            plot_bounds: the geometric boundary of the plot to clip to as JSON
            out_file: the path to store the clipped image
        Return:
            The number of pixels in the new image, or None if no pixels were saved
        Notes:
            Assumes the boundaries are in the same coordinate system
        Exceptions:
            Raises RuntimeError if the polygons are invalid
        """
        logging.debug(
            "Clip to intersect of plot boundary: File: '%s' '%s' Plot: '%s'",
            file_path, str(file_bounds), str(plot_bounds))
        try:
            file_poly = ogr.CreateGeometryFromJson(str(file_bounds))
            plot_poly = ogr.CreateGeometryFromJson(str(plot_bounds))

            if not file_poly or not plot_poly:
                logging.error(
                    "Invalid polygon specified for clip_raster_intersection: File: '%s' plot: '%s'",
                    str(file_bounds), str(plot_bounds))
                raise RuntimeError(
                    "One or more invalid polygons specified when clipping raster"
                )

            intersection = file_poly.Intersection(plot_poly)
            if not intersection or not intersection.Area():
                logging.info("File does not intersect plot boundary: %s",
                             file_path)
                return None

            # Make sure we pass a multipolygon down to the tuple converter
            if intersection.GetGeometryName().startswith('MULTI'):
                multi_polygon = intersection
            else:
                multi_polygon = ogr.Geometry(ogr.wkbMultiPolygon)
                multi_polygon.AddGeometry(intersection)

            # Proceed to clip to the intersection
            tuples = __internal__.geojson_to_tuples(
                geometry_to_geojson(multi_polygon))
            return clip_raster(file_path,
                               tuples,
                               out_path=out_file,
                               compress=True)

        except Exception as ex:
            logging.exception(
                "Exception caught while clipping image to plot intersection")
            raise ex
예제 #2
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Load metadata from dataset
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd)
                if 'spatial_metadata' in terra_md_full:
                    spatial_meta = terra_md_full['spatial_metadata']
                else:
                    spatial_meta = None
        if not spatial_meta:
            ValueError("No spatial metadata found.")

        # Determine which files in dataset need clipping
        files_to_process = {}
        for f in resource['local_paths']:
            if f.startswith("ir_geotiff") and f.endswith(".tif"):
                sensor_name = "ir_geotiff"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta['flirIrCamera']['bounding_box']
                }

            elif f.startswith("rgb_geotiff") and f.endswith(".tif"):
                sensor_name = "rgb_geotiff"
                filename = os.path.basename(f)
                if f.endswith("_left.tif"): side = "left"
                else:                       side = "right"
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta[side]['bounding_box']
                }

            elif f.endswith(".las"):
                sensor_name = "laser3d_las"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": get_las_extents(f)
                }

            # TODO: Add case for laser3d heightmap

        # Fetch experiment name from terra metadata
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine script name
        target_scan = "unknown_scan"
        if 'gantry_variable_metadata' in terra_md_full:
            if 'script_name' in terra_md_full['gantry_variable_metadata']:
                target_scan = terra_md_full['gantry_variable_metadata']['script_name']
                if 'script_hash' in terra_md_full['gantry_variable_metadata']:
                    target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash']

        all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa')
        uploaded_file_ids = []

        for filename in files_to_process:
            file_path = files_to_process[filename]["path"]
            file_bounds = files_to_process[filename]["bounds"]

            overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True)

            if len(overlap_plots) > 0:
                self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots)))
                for plotname in overlap_plots:
                    plot_bounds = overlap_plots[plotname]
                    tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds))

                    plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)"
                    leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0]
                    self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name,
                                                                                             timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset))
                    target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                                                season_name, experiment_name, plot_display_name,
                                                                timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset)

                    out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename)
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite):
                        """If file is a geoTIFF, simply clip it and upload it to Clowder"""
                        clip_raster(file_path, tuples, out_path=out_file)

                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                    elif filename.endswith(".las"):
                        """If file is LAS, we can merge with any existing scan+plot output safely"""
                        merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las")
                        merged_txt = merged_out.replace(".las", "_contents.txt")

                        already_merged = False
                        if os.path.exists(merged_txt):
                            # Check if contents
                            with open(merged_txt, 'r') as contents:
                                for entry in contents.readlines():
                                    if entry.strip() == file_path:
                                        already_merged = True
                                        break
                        if not already_merged:
                            clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out)
                            with open(merged_txt, 'a') as contents:
                                contents.write(file_path+"\n")

                        # Upload the individual plot shards for optimizing las2height later
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(out_file)

                        # Upload the merged result if necessary
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                        # Trigger las2height extractor
                        submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height")


        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        extractor_md = build_metadata(host, self.extractor_info, resource['id'], {
            "files_created": uploaded_file_ids
        }, 'dataset')
        self.log_info(resource, "uploading extractor metadata to Level_1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Write the CSV to the same directory as the source file
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        time_fmt = timestamp + "T12:00:00-07:00"
        rootdir = self.sensors.create_sensor_path(timestamp,
                                                  sensor="rgb_fullfield",
                                                  ext=".csv")
        out_csv = os.path.join(
            os.path.dirname(rootdir),
            resource['name'].replace(".tif", "_canopycover_bety.csv"))
        out_geo = os.path.join(
            os.path.dirname(rootdir),
            resource['name'].replace(".tif", "_canopycover_geo.csv"))

        # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata...

        self.log_info(resource, "Writing BETY CSV to %s" % out_csv)
        csv_file = open(out_csv, 'w')
        (fields, traits) = get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo)
        geo_file = open(out_geo, 'w')
        geo_file.write(','.join([
            'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value',
            'timestamp'
        ]) + '\n')

        # Get full list of experiment plots using date as filter
        all_plots = get_site_boundaries(timestamp, city='Maricopa')
        self.log_info(resource,
                      "found %s plots on %s" % (len(all_plots), timestamp))
        successful_plots = 0
        for plotname in all_plots:
            if plotname.find("KSU") > -1:
                self.log_info(resource, "skipping %s" % plotname)
                continue

            bounds = all_plots[plotname]
            tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds))
            centroid_lonlat = json.loads(
                centroid_from_geojson(bounds))["coordinates"]

            # Use GeoJSON string to clip full field to this plot
            try:
                pxarray = clip_raster(resource['local_paths'][0], tuples)
                if pxarray is not None:
                    if len(pxarray.shape) < 3:
                        self.log_error(
                            resource, "unexpected array shape for %s (%s)" %
                            (plotname, pxarray.shape))
                        continue

                    ccVal = calculate_canopycover_masked(
                        rollaxis(pxarray, 0, 3))

                    if (ccVal > -1):
                        # Prepare and submit datapoint
                        geo_file.write(','.join([
                            plotname, 'Canopy Cover',
                            str(centroid_lonlat[1]),
                            str(centroid_lonlat[0]), time_fmt, host +
                            ("" if host.endswith("/") else "/") + "files/" +
                            resource['id'],
                            str(ccVal), timestamp
                        ]) + '\n')

                    successful_plots += 1
                    if successful_plots % 10 == 0:
                        self.log_info(
                            resource, "processed %s/%s plots" %
                            (successful_plots, len(all_plots)))
                else:
                    continue
            except:
                self.log_error(resource,
                               "error generating cc for %s" % plotname)
                continue

            if (ccVal > -1):
                traits['canopy_cover'] = str(ccVal)
                traits['site'] = plotname
                traits['local_datetime'] = timestamp + "T12:00:00"
                trait_list = generate_traits_list(traits)
                csv_file.write(','.join(map(str, trait_list)) + '\n')

        csv_file.close()
        geo_file.close()

        # Upload this CSV to Clowder
        fileid = upload_to_dataset(connector, host, self.clowder_user,
                                   self.clowder_pass, resource['parent']['id'],
                                   out_csv)
        geoid = upload_to_dataset(connector, host, self.clowder_user,
                                  self.clowder_pass, resource['parent']['id'],
                                  out_geo)

        # Add metadata to original dataset indicating this was run
        self.log_info(resource, "updating file metadata")
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"files_created": [fileid, geoid]}, 'file')
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        # Trigger separate extractors
        self.log_info(resource, "triggering BETY extractor on %s" % fileid)
        submit_extraction(connector, host, secret_key, fileid, "terra.betydb")
        self.log_info(resource,
                      "triggering geostreams extractor on %s" % geoid)
        submit_extraction(connector, host, secret_key, geoid,
                          "terra.geostreams")

        self.end_message(resource)
예제 #4
0
def perform_process(transformer: transformer_class.Transformer, check_md: dict, transformer_md: dict, full_md: dict) -> dict:
    """Performs the processing of the data
    Arguments:
        transformer: instance of transformer class
    Return:
        Returns a dictionary with the results of processing
    """
    # Setup local variables
    timestamp = dateutil.parser.parse(check_md['timestamp'])
    datestamp = timestamp.strftime("%Y-%m-%d")
    localtime = timestamp.strftime("%Y-%m-%dT%H:%M:%S")

    geo_csv_filename = os.path.join(check_md['working_folder'], "canopycover_geostreams.csv")
    bety_csv_filename = os.path.join(check_md['working_folder'], "canopycover.csv")
    geo_file = open(geo_csv_filename, 'w')
    bety_file = open(bety_csv_filename, 'w')

    (fields, traits) = get_traits_table()

    # Setup default trait values
    if not transformer.args.germplasmName is None:
        traits['species'] = transformer.args.germplasmName
    if not transformer.args.citationAuthor is None:
        traits['citation_author'] = transformer.args.citationAuthor
    if not transformer.args.citationTitle is None:
        traits['citation_title'] = transformer.args.citationTitle
    if not transformer.args.citationYear is None:
        traits['citation_year'] = transformer.args.citationYear
    else:
        traits['citation_year'] = (timestamp.year)

    geo_csv_header = ','.join(['site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp'])
    bety_csv_header = ','.join(map(str, fields))
    if geo_file:
        geo_file.write(geo_csv_header + "\n")
    if bety_file:
        bety_file.write(bety_csv_header + "\n")

    all_plots = get_site_boundaries(datestamp, city='Maricopa')
    logging.debug("Found %s plots for date %s", str(len(all_plots)), str(datestamp))

    # Loop through finding all image files
    image_exts = SUPPORTED_IMAGE_EXTS
    num_files = 0
    total_plots_calculated = 0
    logging.debug("Looking for images with an extension of: %s", ",".join(image_exts))
    for one_file in check_md['list_files']():
        ext = os.path.splitext(one_file)[1]
        if not ext or not ext in image_exts:
            logging.debug("Skipping non-supported file '%s'", one_file)
            continue

        image_bounds = get_image_bounds(one_file)
        if not image_bounds:
            logging.info("Image file does not appear to be geo-referenced '%s'", one_file)
            continue

        overlap_plots = find_plots_intersect_boundingbox(image_bounds, all_plots, fullmac=True)
        num_plots = len(overlap_plots)

        if not num_plots or num_plots < 0:
            logging.info("No plots intersect file '%s'", one_file)
            continue

        num_files += 1
        image_spatial_ref = get_spatial_reference_from_json(image_bounds)
        for plot_name in overlap_plots:
            plot_bounds = convert_json_geometry(overlap_plots[plot_name], image_spatial_ref)
            tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds))
            centroid = json.loads(centroid_from_geojson(plot_bounds))["coordinates"]

            try:
                logging.debug("Clipping raster to plot")
                pxarray = clip_raster(one_file, tuples, os.path.join(check_md['working_folder'], "temp.tif"))
                if pxarray is not None:
                    if len(pxarray.shape) < 3:
                        logging.warning("Unexpected image dimensions for file '%s'", one_file)
                        logging.warning("    expected 3 and received %s", str(pxarray.shape))
                        break

                    logging.debug("Calculating canopy cover")
                    cc_val = calculate_canopycover_masked(np.rollaxis(pxarray, 0, 3))

                    # Write the datapoint geographically and otherwise
                    logging.debug("Writing to CSV files")
                    if geo_file:
                        csv_data = ','.join([plot_name,
                                             'Canopy Cover',
                                             str(centroid[1]),
                                             str(centroid[0]),
                                             localtime,
                                             one_file,
                                             str(cc_val),
                                             datestamp])
                        geo_file.write(csv_data + "\n")

                    if bety_file:
                        traits['canopy_cover'] = str(cc_val)
                        traits['site'] = plot_name
                        traits['local_datetime'] = localtime
                        trait_list = generate_traits_list(traits)
                        csv_data = ','.join(map(str, trait_list))
                        bety_file.write(csv_data + "\n")

                    total_plots_calculated += 1

                else:
                    continue
            except Exception as ex:
                logging.warning("Exception caught while processing canopy cover: %s", str(ex))
                logging.warning("Error generating canopy cover for '%s'", one_file)
                logging.warning("    plot name: '%s'", plot_name)
                continue

    # Check that we got something
    if not num_files:
        return {'code': -1000, 'error': "No files were processed"}
    if not total_plots_calculated:
        return {'code': -1001, 'error': "No plots intersected with the images provided"}

    # Setup the metadata for returning files
    file_md = []
    if geo_file:
        file_md.append({'path': geo_csv_filename, 'key': 'csv'})
    if bety_file:
        file_md.append({'path': bety_csv_filename, 'key': 'csv'})

    # Perform cleanup
    if geo_file:
        geo_file.close()
        del geo_file
    if bety_file:
        bety_file.close()
        del bety_file

    return {'code': 0, 'files': file_md}
예제 #5
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get full list of experiment plots using date as filter
        ds_info = get_info(connector, host, secret_key, resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        time_fmt = timestamp+"T12:00:00-07:00"
        out_csv = self.sensors.create_sensor_path(timestamp, sensor="ir_meantemp", opts=["bety"])
        out_geo = out_csv.replace("_bety", "_geo")

        # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata...

        self.log_info(resource, "Writing BETY CSV to %s" % out_csv)
        csv_file = open(out_csv, 'w')
        (fields, traits) = get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo)
        geo_file = open(out_geo, 'w')
        geo_file.write(','.join(['site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp']) + '\n')

        successful_plots = 0
        nan_plots = 0
        all_plots = get_site_boundaries(timestamp, city='Maricopa')
        for plotname in all_plots:
            if plotname.find("KSU") > -1:
                self.log_info(resource, "skipping %s" % plotname)
                continue

            bounds = all_plots[plotname]
            tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds))
            centroid_lonlat = json.loads(centroid_from_geojson(bounds))["coordinates"]

            # Use GeoJSON string to clip full field to this plot
            pxarray = clip_raster(resource['local_paths'][0], tuples)

            # Filter out any
            pxarray[pxarray < 0] = numpy.nan
            mean_tc = numpy.nanmean(pxarray) - 273.15

            # Create BETY-ready CSV
            if not numpy.isnan(mean_tc):
                geo_file.write(','.join([plotname,
                                         'IR Surface Temperature',
                                         str(centroid_lonlat[1]),
                                         str(centroid_lonlat[0]),
                                         time_fmt,
                                         host + ("" if host.endswith("/") else "/") + "files/" + resource['id'],
                                         str(mean_tc),
                                         timestamp]) + '\n')

                traits['surface_temperature'] = str(mean_tc)
                traits['site'] = plotname
                traits['local_datetime'] = timestamp+"T12:00:00"
                trait_list = generate_traits_list(traits)
                csv_file.write(','.join(map(str, trait_list)) + '\n')
            else:
                nan_plots += 1

            successful_plots += 1

        self.log_info(resource, "skipped %s of %s plots due to NaN" % (nan_plots, len(all_plots)))

        # submit CSV to BETY
        csv_file.close()
        geo_file.close()

        # Upload CSVs to Clowder
        fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_csv)
        geoid  = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_geo)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        self.log_info(resource, "updating file metadata")
        metadata = build_metadata(host, self.extractor_info, resource['parent']['id'], {
            "total_plots": len(all_plots),
            "plots_processed": successful_plots,
            "blank_plots": nan_plots,
            "files_created": [fileid, geoid],
            "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature"
        }, 'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata)

        # Trigger downstream extractors
        self.log_info(resource, "triggering BETY extractor on %s" % fileid)
        submit_extraction(connector, host, secret_key, fileid, "terra.betydb")
        self.log_info(resource, "triggering geostreams extractor on %s" % geoid)
        submit_extraction(connector, host, secret_key, geoid, "terra.geostreams")

        self.end_message(resource)
예제 #6
0
def perform_process(transformer: transformer_class.Transformer, check_md: dict,
                    transformer_md: dict, full_md: list) -> dict:
    """Performs the processing of the data
    Arguments:
        transformer: instance of transformer class
        check_md: metadata associated with this request
        transformer_md: metadata associated with this transformer
        full_md: the full set of metadata
    Return:
        Returns a dictionary with the results of processing
    """
    # pylint: disable=unused-argument
    # loop through the available files and clip data into plot-level files
    processed_files = 0
    processed_plots = 0
    start_timestamp = datetime.datetime.now()
    file_list = check_md['list_files']()
    files_to_process = __internal__.get_files_to_process(
        file_list, transformer.args.sensor, transformer.args.epsg)
    logging.info("Found %s files to process", str(len(files_to_process)))

    container_md = []
    if files_to_process:
        # Get all the possible plots
        datestamp = check_md['timestamp'][0:10]
        all_plots = get_site_boundaries(datestamp, city='Maricopa')
        logging.debug("Have %s plots for site", len(all_plots))

        for filename in files_to_process:
            processed_files += 1
            file_path = files_to_process[filename]['path']
            file_bounds = files_to_process[filename]['bounds']
            sensor = files_to_process[filename]['sensor_name']
            logging.debug("File bounds: %s", str(file_bounds))

            overlap_plots = find_plots_intersect_boundingbox(file_bounds,
                                                             all_plots,
                                                             fullmac=True)
            logging.info("Have %s plots intersecting file '%s'",
                         str(len(overlap_plots)), filename)

            file_spatial_ref = __internal__.get_spatial_reference_from_json(
                file_bounds)
            for plot_name in overlap_plots:
                processed_plots += 1
                plot_bounds = convert_json_geometry(overlap_plots[plot_name],
                                                    file_spatial_ref)
                logging.debug("Clipping out plot '%s': %s", str(plot_name),
                              str(plot_bounds))
                if __internal__.calculate_overlap_percent(
                        plot_bounds, file_bounds) < 0.10:
                    logging.info("Skipping plot with too small overlap: %s",
                                 plot_name)
                    continue
                tuples = __internal__.geojson_to_tuples(plot_bounds)

                plot_md = __internal__.cleanup_request_md(check_md)
                plot_md['plot_name'] = plot_name

                if filename.endswith('.tif'):
                    # If file is a geoTIFF, simply clip it
                    out_path = os.path.join(check_md['working_folder'],
                                            plot_name)
                    out_file = os.path.join(out_path, filename)
                    if not os.path.exists(out_path):
                        os.makedirs(out_path)

                    if not transformer.args.full_plot_fill:
                        __internal__.clip_raster_intersection(
                            file_path, file_bounds, plot_bounds, out_file)
                    else:
                        logging.info(
                            "Clipping image to plot boundary with fill")
                        clip_raster(file_path,
                                    tuples,
                                    out_path=out_file,
                                    compress=True)

                    cur_md = __internal__.prepare_container_md(
                        plot_name, plot_md, sensor, file_path, [out_file])
                    container_md = __internal__.merge_container_md(
                        container_md, cur_md)

                elif filename.endswith('.las'):
                    out_path = os.path.join(check_md['working_folder'],
                                            plot_name)
                    out_file = os.path.join(out_path, filename)
                    if not os.path.exists(out_path):
                        os.makedirs(out_path)

                    __internal__.clip_las(file_path, tuples, out_path=out_file)

                    cur_md = __internal__.prepare_container_md(
                        plot_name, plot_md, sensor, file_path, [out_file])
                    container_md = __internal__.merge_container_md(
                        container_md, cur_md)

    return {
        'code': 0,
        'container': container_md,
        configuration.TRANSFORMER_NAME: {
            'utc_timestamp': datetime.datetime.utcnow().isoformat(),
            'processing_time': str(datetime.datetime.now() - start_timestamp),
            'total_file_count': len(file_list),
            'processed_file_count': processed_files,
            'total_plots_processed': processed_plots,
            'sensor': transformer.args.sensor
        }
    }
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        """Performs plot level image extraction

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            resource(dict): dictionary containing the resources associated with the request
            parameters(json): json object of the triggering message contents
        """
        self.start_message(resource)
        super(ClipByShape, self).process_message(connector, host, secret_key,
                                                 resource, parameters)

        # Handle any parameters
        if isinstance(parameters, basestring):
            parameters = json.loads(parameters)
        if isinstance(parameters, unicode):
            parameters = json.loads(str(parameters))

        # Initialize local variables
        dataset_name = parameters["datasetname"]
        season_name, experiment_name = "Unknown Season", "Unknown Experiment"
        datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None

        # Array containing the links to uploaded files
        uploaded_file_ids = []

        # Find the files we're interested in
        # pylint: disable=line-too-long
        (shapefile, shxfile, dbffile,
         imagefiles) = self.find_shape_image_files(resource['local_paths'],
                                                   resource['triggering_file'])
        # pylint: enable=line-too-long
        if shapefile is None:
            self.log_skip(resource, "No shapefile found")
            return
        if shxfile is None:
            self.log_skip(resource, "No SHX file found")
            return
        num_image_files = len(imagefiles)
        if num_image_files <= 0:
            self.log_skip(resource,
                          "No image files with geographic boundaries found")
            return

        # Get the best username, password, and space
        old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass,
                                     self.clowderspace)
        self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context(
        )

        # Ensure that the clowder information is valid
        if not confirm_clowder_info(host, secret_key, self.clowderspace,
                                    self.clowder_user, self.clowder_pass):
            self.log_error(resource, "Clowder configuration is invalid. Not processing " +\
                                     "request")
            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)
            return

        # Change the base path of files to include the user by tweaking the sensor's value
        sensor_old_base = None
        if self.get_terraref_metadata is None:
            _, new_base = self.get_username_with_base_path(
                host, secret_key, resource['id'], self.sensors.base)
            sensor_old_base = self.sensors.base
            self.sensors.base = new_base

        try:
            # Build up a list of image IDs
            image_ids = {}
            if 'files' in resource:
                for one_image in imagefiles:
                    image_name = os.path.basename(one_image)
                    for res_file in resource['files']:
                        if ('filename' in res_file) and ('id' in res_file) and \
                                                            (image_name == res_file['filename']):
                            image_ids[image_name] = res_file['id']

            # Get timestamps. Also get season and experiment information for Clowder collections
            datestamp = self.find_datestamp(dataset_name)
            timestamp = timestamp_to_terraref(
                self.find_timestamp(dataset_name))
            (season_name, experiment_name,
             _) = self.get_season_and_experiment(datestamp, self.sensor_name)

            if self.experiment_metadata:
                if 'extractors' in self.experiment_metadata:
                    extractor_json = self.experiment_metadata['extractors']
                    if 'shapefile' in extractor_json:
                        if 'plot_column_name' in extractor_json['shapefile']:
                            plot_name_idx = extractor_json['shapefile'][
                                'plot_column_name']

            # Check our current local variables
            if dbffile is None:
                self.log_info(resource,
                              "DBF file not found, using default plot naming")
            self.log_info(resource, "Extracting plots using shapefile '" + \
                                                        os.path.basename(shapefile) + "'")

            # Load the shapes and find the plot name column if we have a DBF file
            shape_in = ogr.Open(shapefile)
            layer = shape_in.GetLayer(
                os.path.split(os.path.splitext(shapefile)[0])[1])
            feature = layer.GetNextFeature()
            layer_ref = layer.GetSpatialRef()

            if dbffile:
                shape_table = DBF(dbffile,
                                  lowernames=True,
                                  ignore_missing_memofile=True)
                shape_rows = iter(list(shape_table))

                # Make sure if we have the column name of plot-names specified that it exists in
                # the shapefile
                column_names = shape_table.field_names
                if not plot_name_idx is None:
                    if not find_all_plot_names(plot_name_idx, column_names):
                        ValueError(
                            resource,
                            "Shapefile data does not have specified plot name"
                            + " column '" + plot_name_idx + "'")

                # Lookup a plot name field to use
                if plot_name_idx is None:
                    for one_name in column_names:
                        # pylint: disable=line-too-long
                        if one_name == "observationUnitName":
                            plot_name_idx = one_name
                            break
                        elif (one_name.find('plot') >= 0) and (
                            (one_name.find('name') >= 0)
                                or one_name.find('id')):
                            plot_name_idx = one_name
                            break
                        elif one_name == 'id':
                            plot_name_idx = one_name
                            break
                        # pylint: enable=line-too-long
                if plot_name_idx is None:
                    ValueError(
                        resource,
                        "Shapefile data does not have a plot name field '" +
                        os.path.basename(dbffile) + "'")

            # Setup for the extracted plot images
            plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \
                                                                                    " (By Plot)"

            # Loop through each polygon and extract plot level data
            alternate_plot_id = 0
            while feature:

                # Current geometry to extract
                plot_poly = feature.GetGeometryRef()
                if layer_ref:
                    plot_poly.AssignSpatialReference(layer_ref)
                plot_spatial_ref = plot_poly.GetSpatialReference()

                # Determie the plot name to use
                plot_name = None
                alternate_plot_id = alternate_plot_id + 1
                if shape_rows and plot_name_idx:
                    try:
                        row = next(shape_rows)
                        plot_name = get_plot_name(plot_name_idx, row)
                    except StopIteration:
                        pass
                if not plot_name:
                    plot_name = "plot_" + str(alternate_plot_id)

                # Determine output dataset name
                leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp
                self.log_info(
                    resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
                    (season_name, experiment_name, plot_display_name,
                     datestamp[:4], datestamp[5:7], datestamp[8:10],
                     leaf_dataset))

                # Create the dataset, even if we have no data to put in it, so that the caller knows
                # it was addressed
                target_dsid = build_dataset_hierarchy_crawl(
                    host,
                    secret_key,
                    self.clowder_user,
                    self.clowder_pass,
                    self.clowderspace,
                    season_name,
                    experiment_name,
                    plot_display_name,
                    datestamp[:4],
                    datestamp[5:7],
                    datestamp[8:10],
                    leaf_ds_name=leaf_dataset)

                # Loop through all the images looking for overlap
                for filename in imagefiles:

                    # Get the bounds. We also get the reference systems in case we need to convert
                    # between them
                    bounds = imagefiles[filename]['bounds']
                    bounds_spatial_ref = bounds.GetSpatialReference()

                    # Checking for geographic overlap and skip if there is none
                    if not bounds_spatial_ref.IsSame(plot_spatial_ref):
                        # We need to convert coordinate system before an intersection
                        transform = osr.CoordinateTransformation(
                            bounds_spatial_ref, plot_spatial_ref)
                        new_bounds = bounds.Clone()
                        if new_bounds:
                            new_bounds.Transform(transform)
                            intersection = plot_poly.Intersection(new_bounds)
                            new_bounds = None
                    else:
                        # Same coordinate system. Simple intersection
                        intersection = plot_poly.Intersection(bounds)

                    if intersection.GetArea() == 0.0:
                        self.log_info(resource, "Skipping image: " + filename)
                        continue

                    # Determine where we're putting the clipped file on disk and determine overwrite
                    # pylint: disable=unexpected-keyword-arg
                    out_file = self.sensors.create_sensor_path(
                        timestamp,
                        filename=os.path.basename(filename),
                        plot=plot_name,
                        subsensor=self.sensor_name)
                    if (file_exists(out_file) and not self.overwrite):
                        # The file exists and don't want to overwrite it
                        self.logger.warn("Skipping existing output file: %s",
                                         out_file)
                        continue

                    self.log_info(
                        resource, "Attempting to clip '" + filename +
                        "' to polygon number " + str(alternate_plot_id))

                    # Create destination folder on disk if we haven't done that already
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    # Clip the raster
                    bounds_tuple = polygon_to_tuples_transform(
                        plot_poly, bounds_spatial_ref)

                    clip_pix = clip_raster(filename,
                                           bounds_tuple,
                                           out_path=out_file)
                    if clip_pix is None:
                        self.log_error(
                            resource,
                            "Failed to clip image to plot name " + plot_name)
                        continue

                    # Upload the clipped image to the dataset
                    found_in_dest = check_file_in_dataset(
                        connector,
                        host,
                        secret_key,
                        target_dsid,
                        out_file,
                        remove=self.overwrite)
                    if not found_in_dest or self.overwrite:
                        image_name = os.path.basename(filename)
                        content = {
                            "comment":
                            "Clipped from shapefile " +
                            os.path.basename(shapefile),
                            "imageName":
                            image_name
                        }
                        if image_name in image_ids:
                            content['imageID'] = image_ids[image_name]

                        fileid = upload_to_dataset(connector, host,
                                                   self.clowder_user,
                                                   self.clowder_pass,
                                                   target_dsid, out_file)
                        uploaded_file_ids.append(fileid)

                        # Generate our metadata
                        meta = build_metadata(host, self.extractor_info,
                                              fileid, content, 'file')
                        clowder_file.upload_metadata(connector, host,
                                                     secret_key, fileid, meta)
                    else:
                        self.logger.warn(
                            "Skipping existing file in dataset: %s", out_file)

                    self.created += 1
                    self.bytes += os.path.getsize(out_file)

                # Get the next shape to extract
                feature = layer.GetNextFeature()

            # Tell Clowder this is completed so subsequent file updates don't daisy-chain
            id_len = len(uploaded_file_ids)
            if id_len > 0 or self.created > 0:
                extractor_md = build_metadata(
                    host, self.extractor_info, resource['id'],
                    {"files_created": uploaded_file_ids}, 'dataset')
                self.log_info(
                    resource,
                    "Uploading shapefile plot extractor metadata to Level_2 dataset: "
                    + str(extractor_md))
                clowder_dataset.remove_metadata(connector, host, secret_key,
                                                resource['id'],
                                                self.extractor_info['name'])
                clowder_dataset.upload_metadata(connector, host, secret_key,
                                                resource['id'], extractor_md)
            else:
                self.logger.warn(
                    "Skipping dataset metadata updating since no files were loaded"
                )

        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if not sensor_old_base is None:
                self.sensors.base = sensor_old_base

            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)
예제 #8
0
def perform_process(transformer: transformer_class.Transformer, check_md: dict,
                    transformer_md: list, full_md: list) -> dict:
    """Performs the processing of the data
    Arguments:
        transformer: instance of transformer class
    Return:
        Returns a dictionary with the results of processing
    """
    # pylint: disable=unused-argument
    # Disabling pylint checks because resolving them would make code unreadable
    # pylint: disable=too-many-branches, too-many-statements, too-many-locals
    # Setup local variables
    start_timestamp = datetime.datetime.now()
    timestamp = dateutil.parser.parse(check_md['timestamp'])
    datestamp = timestamp.strftime("%Y-%m-%d")
    localtime = timestamp.strftime("%Y-%m-%dT%H:%M:%S")

    geo_csv_filename = os.path.join(check_md['working_folder'],
                                    "meantemp_geostreams.csv")
    bety_csv_filename = os.path.join(check_md['working_folder'],
                                     "meantemp.csv")
    geo_file = open(geo_csv_filename, 'w')
    bety_file = open(bety_csv_filename, 'w')

    (fields, traits) = get_traits_table()

    # Setup default trait values
    if transformer.args.citationAuthor is not None:
        traits['citation_author'] = transformer.args.citationAuthor
    if transformer.args.citationTitle is not None:
        traits['citation_title'] = transformer.args.citationTitle
    if transformer.args.citationYear is not None:
        traits['citation_year'] = transformer.args.citationYear
    else:
        traits['citation_year'] = timestamp.year

    geo_csv_header = ','.join([
        'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value',
        'timestamp'
    ])
    bety_csv_header = ','.join(map(str, fields))
    if geo_file:
        geo_file.write(geo_csv_header + "\n")
    if bety_file:
        bety_file.write(bety_csv_header + "\n")

    all_plots = get_site_boundaries(datestamp, city='Maricopa')
    logging.debug("Found %s plots for date %s", str(len(all_plots)),
                  str(datestamp))

    # Loop through finding all image files
    image_exts = SUPPORTED_IMAGE_EXTS
    num_files = 0
    number_empty_plots = 0
    total_plots_calculated = 0
    total_files = 0
    processed_plots = 0
    logging.debug("Looking for images with an extension of: %s",
                  ",".join(image_exts))
    for one_file in check_md['list_files']():
        total_files += 1
        ext = os.path.splitext(one_file)[1]
        if not ext or ext not in image_exts:
            logging.debug("Skipping non-supported file '%s'", one_file)
            continue

        image_bounds = get_image_bounds(one_file)
        if not image_bounds:
            logging.info(
                "Image file does not appear to be geo-referenced '%s'",
                one_file)
            continue

        overlap_plots = find_plots_intersect_boundingbox(image_bounds,
                                                         all_plots,
                                                         fullmac=True)
        num_plots = len(overlap_plots)

        if not num_plots or num_plots < 0:
            logging.info("No plots intersect file '%s'", one_file)
            continue

        num_files += 1
        image_spatial_ref = get_spatial_reference_from_json(image_bounds)
        for plot_name in overlap_plots:
            processed_plots += 1
            plot_bounds = convert_json_geometry(overlap_plots[plot_name],
                                                image_spatial_ref)
            tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds))
            centroid = json.loads(
                centroid_from_geojson(plot_bounds))["coordinates"]

            try:
                logging.debug("Clipping raster to plot")
                clip_path = os.path.join(check_md['working_folder'],
                                         "temp.tif")
                pxarray = clip_raster(one_file, tuples, clip_path)
                if os.path.exists(clip_path):
                    os.remove(clip_path)
                if pxarray is not None:
                    logging.debug("Calculating mean temperature")
                    pxarray[pxarray < 0] = np.nan
                    mean_tc = np.nanmean(pxarray) - 273.15

                    # Check for empty plots
                    if np.isnan(mean_tc):
                        number_empty_plots += 1
                        continue

                    # Write the data point geographically and otherwise
                    logging.debug("Writing to CSV files")
                    if geo_file:
                        csv_data = ','.join([
                            plot_name, 'IR Surface Temperature',
                            str(centroid[1]),
                            str(centroid[0]), localtime, one_file,
                            str(mean_tc), datestamp
                        ])
                        geo_file.write(csv_data + "\n")

                    if bety_file:
                        traits['surface_temperature'] = str(mean_tc)
                        traits['site'] = plot_name
                        traits['local_datetime'] = localtime
                        trait_list = generate_traits_list(traits)
                        csv_data = ','.join(map(str, trait_list))
                        bety_file.write(csv_data + "\n")

                    total_plots_calculated += 1

                else:
                    continue
            except Exception as ex:
                logging.warning(
                    "Exception caught while processing mean temperature: %s",
                    str(ex))
                logging.warning("Error generating mean temperature for '%s'",
                                one_file)
                logging.warning("    plot name: '%s'", plot_name)
                continue

    # Check that we got something
    if not num_files:
        return {'code': -1000, 'error': "No files were processed"}
    if not total_plots_calculated:
        return {
            'code': -1001,
            'error': "No plots intersected with the images provided"
        }

    # Setup the metadata for returning files
    file_md = []
    if geo_file:
        file_md.append({'path': geo_csv_filename, 'key': 'csv'})
    if bety_file:
        file_md.append({'path': bety_csv_filename, 'key': 'csv'})

    # Perform cleanup
    if geo_file:
        geo_file.close()
    if bety_file:
        bety_file.close()

    return {
        'code': 0,
        'files': file_md,
        configuration.TRANSFORMER_NAME: {
            'version': configuration.TRANSFORMER_VERSION,
            'utc_timestamp': datetime.datetime.utcnow().isoformat(),
            'processing_time': str(datetime.datetime.now() - start_timestamp),
            'total_file_count': total_files,
            'processed_file_count': num_files,
            'total_plots_processed': processed_plots,
            'empty_plots': number_empty_plots
        }
    }