def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        with open(resource['local_paths'][0], 'r') as inputcsv:
            inputlines = inputcsv.readlines()

        if len(inputlines) <= 1:
            # first check if there is data besides header line
            self.log_info(resource,
                          "no trait lines found in CSV; skipping upload")
        else:
            # submit CSV to BETY
            self.log_info(
                resource, "found %s trait lines; submitting CSV to bety" %
                str(len(inputlines) - 1))
            submit_traits(resource['local_paths'][0], betykey=self.bety_key)

            # Add metadata to original dataset indicating this was run
            self.log_info(resource,
                          "updating file metadata (%s)" % resource['id'])
            ext_meta = build_metadata(host, self.extractor_info,
                                      resource['id'], {}, 'file')
            upload_metadata(connector, host, secret_key, resource['id'],
                            ext_meta)

        self.end_message(resource)
Esempio n. 2
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        successful_plots = 0
        with open(resource['local_paths'][0], 'rb') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                centroid_lonlat = [row['lon'], row['lat']]
                time_fmt = row['dp_time']
                timestamp = row['timestamp']
                dpmetadata = {
                    "source": row['source'],
                    "value": row['value']
                }
                trait = row['trait']

                create_datapoint_with_dependencies(connector, host, secret_key, trait,
                                                   (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
                                                   dpmetadata, timestamp)
                successful_plots += 1

        # Add metadata to original dataset indicating this was run
        self.log_info(resource, "updating file metadata (%s)" % resource['id'])
        ext_meta = build_metadata(host, self.extractor_info, resource['id'], {
            "plots_processed": successful_plots,
        }, 'file')
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        self.end_message(resource)
Esempio n. 3
0
def upload_to_geostreams(file, clowder_id):
    conn = Connector(
        None, mounted_paths={"/home/clowder/sites": "/home/clowder/sites"})

    successful_plots = 0
    with open(file, 'rb') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            centroid_lonlat = [row['lon'], row['lat']]
            time_fmt = row['dp_time']
            timestamp = row['timestamp']
            dpmetadata = {"source": row['source'], "value": row['value']}
            trait = row['trait']

            create_datapoint_with_dependencies(
                conn, host, secret_key, trait,
                (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
                dpmetadata, timestamp)
            successful_plots += 1

    # Extractor metadata
    extractor_info = {
        "extractor_name": "terra.geostreams",
        "extractor_version": "1.0",
        "extractor_author": "Max Burnette <*****@*****.**>",
        "extractor_description": "Geostreams CSV uploader",
        "extractor_repo": "https://github.com/terraref/computing-pipeline.git"
    }

    # Add metadata to original dataset indicating this was run
    ext_meta = build_metadata(host, extractor_info, clowder_id, {
        "plots_processed": successful_plots,
    }, 'file')
    upload_metadata(conn, host, secret_key, clowder_id, ext_meta)
Esempio n. 4
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        f = resource['local_paths'][0]

        self.log_info(resource, "determining image quality")
        qual = getImageQuality(f)

        self.log_info(resource, "creating output image")
        md = download_ds_metadata(connector, host, secret_key,
                                  resource['parent']['id'])
        terramd = get_terraref_metadata(md)
        if "left" in f:
            bounds = geojson_to_tuples(
                terramd['spatial_metadata']['left']['bounding_box'])
        else:
            bounds = geojson_to_tuples(
                terramd['spatial_metadata']['right']['bounding_box'])
        output = f.replace(".tif", "_nrmac.tif")
        create_geotiff(np.array([[qual, qual], [qual, qual]]), bounds, output)
        upload_to_dataset(connector, host, self.clowder_user,
                          self.clowder_pass, resource['parent']['id'], output)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"quality_score": qual}, 'file')
        self.log_info(resource, "uploading extractor metadata")
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        # Put files alongside .nc file
        out_dir = os.path.dirname(resource['local_paths'][0])
        out_fname_root = resource['name'].replace('.nc', '')

        metaFilePath = os.path.join(out_dir, out_fname_root + '_metadata.cdl')
        if not os.path.isfile(metaFilePath) or self.overwrite:
            logging.info('...extracting metadata in cdl format: %s' %
                         metaFilePath)
            with open(metaFilePath, 'w') as fmeta:
                subprocess.call(
                    ['ncks', '--cdl', '-m', '-M', resource['local_paths'][0]],
                    stdout=fmeta)
            self.created += 1
            self.bytes += os.path.getsize(metaFilePath)
            upload_to_dataset(connector, host, secret_key,
                              resource['parent']['id'], metaFilePath)

        metaFilePath = os.path.join(out_dir, out_fname_root + '._metadataxml')
        if not os.path.isfile(metaFilePath) or self.overwrite:
            logging.info('...extracting metadata in xml format: %s' %
                         metaFilePath)
            with open(metaFilePath, 'w') as fmeta:
                subprocess.call(
                    ['ncks', '--xml', '-m', '-M', resource['local_paths'][0]],
                    stdout=fmeta)
            self.created += 1
            self.bytes += os.path.getsize(metaFilePath)
            upload_to_dataset(connector, host, secret_key,
                              resource['parent']['id'], metaFilePath)

        metaFilePath = os.path.join(out_dir,
                                    out_fname_root + '._metadata.json')
        if not os.path.isfile(metaFilePath) or self.overwrite:
            logging.info('...extracting metadata in json format: %s' %
                         metaFilePath)
            with open(metaFilePath, 'w') as fmeta:
                subprocess.call(
                    ['ncks', '--jsn', '-m', '-M', resource['local_paths'][0]],
                    stdout=fmeta)
            self.created += 1
            self.bytes += os.path.getsize(metaFilePath)
            upload_to_dataset(connector, host, secret_key,
                              resource['parent']['id'], metaFilePath)

            # Add json metadata to original netCDF file
            with open(metaFilePath, 'r') as metajson:
                metadata = build_metadata(host,
                                          self.extractor_info, resource['id'],
                                          json.load(metajson), 'dataset')
                upload_metadata(connector, host, secret_key,
                                resource['parent']['id'], metadata)

        self.end_message()
Esempio n. 6
0
def upload_to_bety(file, clowder_id):
    conn = Connector(
        None, mounted_paths={"/home/clowder/sites": "/home/clowder/sites"})

    submit_traits(file, betykey=bety_key)

    # Extractor metadata
    extractor_info = {
        "extractor_name": "terra.betydb",
        "extractor_version": "1.0",
        "extractor_author": "Max Burnette <*****@*****.**>",
        "extractor_description": "BETYdb CSV uploader",
        "extractor_repo": "https://github.com/terraref/computing-pipeline.git"
    }

    # Add metadata to original dataset indicating this was run
    ext_meta = build_metadata(
        host, extractor_info, clowder_id, {
            "betydb_link":
            "https://terraref.ncsa.illinois.edu/bety/api/v1/variables?name=canopy_cover"
        }, 'file')
    upload_metadata(conn, host, secret_key, clowder_id, ext_meta)
Esempio n. 7
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        logger = logging.getLogger(__name__)
        inputfile = resource["local_paths"][0]
        file_id = resource['id']

        if self._validate(inputfile):
            # set tags
            tags = {'tags': ['STNeeded', 'CKANNeeded']}
            rtags = {'tags': ['ValidationNeeded', 'ValidationFailed']}

            # set metadata
            metadata = self._make_metadata(inputfile)
            metadata = self.get_metadata(metadata, 'file', file_id, host)
            try:
                files.upload_metadata(connector, host, secret_key, file_id,
                                      metadata)
            except BaseException:
                return

        else:

            tags = {'tags': ['ValidationFailed']}
            rtags = {'tags': ['ValidationNeeded']}

        logger.debug('adding tags={}'.format(tags))
        files.upload_tags(connector, host, secret_key, file_id, tags)

        if rtags:
            logger.debug('removing tags={}'.format(rtags))
            headers = {'Content-Type': 'application/json'}
            url = '{}api/files/{}/tags?key={}'.format(host, file_id,
                                                      secret_key)
            connector.delete(
                url,
                headers=headers,
                data=json.dumps(rtags),
                verify=connector.ssl_verify if connector else True)
Esempio n. 8
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        inputfile = resource["local_paths"][0]
        file_id = resource['id']

        metadata = self.upload(inputfile)
        self.logger.debug(metadata)
        if metadata:
            metadata = self.get_metadata(metadata, 'file', file_id, host)
            self.logger.debug(metadata)

            # upload metadata
            files.upload_metadata(connector, host, secret_key, file_id, metadata)

            # set tags
            tags = {'tags': ['SensorThings']}
            files.upload_tags(connector, host, secret_key, file_id, tags)
            connector.status_update(StatusMessage.processing, {"type": "file", "id": file_id}, "Deleting file tags.")

            # delete tags
            headers = {'Content-Type': 'application/json'}
            url = '{}api/files/{}/tags?key={}'.format(host, file_id, secret_key)
            tags = {'tags': ['STNeeded']}
            connector.delete(url, headers=headers, data=json.dumps(tags), verify=connector.ssl_verify)
Esempio n. 9
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # rulechecker provided some key information for us in parameters
        if type(parameters) is str:
            parameters = json.loads(parameters)
        if 'parameters' in parameters:
            parameters = parameters['parameters']
        if type(parameters) is unicode:
            parameters = json.loads(str(parameters))
        dataset_name = parameters["output_dataset"]
        scan_name = parameters["scan_type"] if "scan_type" in parameters else ""

        timestamp = dataset_name.split(" - ")[1]

        # Input path will suggest which sensor we are seeing
        sensor_name, sensor_lookup = None, None
        for f in resource['files']:
            if f['filepath'].find("rgb_geotiff") > -1:
                sensor_name = "stereoTop"
                sensor_lookup = "rgb_fullfield"
            elif f['filepath'].find("ir_geotiff") > -1:
                sensor_name = "flirIrCamera"
                sensor_lookup = "ir_fullfield"
            elif f['filepath'].find("laser3d_heightmap") > -1:
                sensor_name = "scanner3DTop"
                sensor_lookup = "laser3d_fullfield"
            if sensor_lookup is not None:
                break

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(
            timestamp, sensor_name, {})
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output file paths
        out_tif_full = self.sensors.create_sensor_path(
            timestamp, sensor=sensor_lookup,
            opts=[scan_name]).replace(" ", "_")
        out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif")
        out_tif_medium = out_tif_full.replace(".tif", "_10pct.tif")
        out_png = out_tif_full.replace(".tif", ".png")
        out_vrt = out_tif_full.replace(".tif", ".vrt")
        out_dir = os.path.dirname(out_vrt)

        # TODO: Check for L1 version of VRT and _thumb and if the JSON contents match, copy instead of regenerating

        # If outputs already exist, we don't need to do anything else
        found_all = True
        if self.thumb:
            output_files = [out_vrt, out_tif_thumb]
        else:
            output_files = [out_tif_full, out_tif_medium, out_png]
        for output_file in output_files:
            if not file_exists(output_file):
                found_all = False
                break
        if found_all and not self.overwrite:
            if self.thumb:
                self.log_info(
                    resource,
                    "thumb output already exists; triggering terra.geotiff.fieldmosaic_full"
                )
                r = requests.post(
                    "%sapi/%s/%s/extractions?key=%s" %
                    (host, 'datasets', resource['id'], secret_key),
                    headers={"Content-Type": "application/json"},
                    data=json.dumps({
                        "extractor": 'terra.geotiff.fieldmosaic_full',
                        "parameters": parameters
                    }))
                r.raise_for_status()
            else:
                self.log_skip(resource, "all outputs already exist")
            return

        # Perform actual field stitching
        if not self.darker or sensor_lookup != 'rgb_fullfield':
            (nu_created, nu_bytes) = self.generateSingleMosaic(
                connector, host, secret_key, out_dir, out_vrt, out_tif_thumb,
                out_tif_full, out_tif_medium, parameters, resource)
        else:
            (nu_created, nu_bytes) = self.generateDarkerMosaic(
                connector, host, secret_key, out_dir, out_vrt, out_tif_thumb,
                out_tif_full, out_tif_medium, parameters, resource)
        self.created += nu_created
        self.bytes += nu_bytes

        if not self.thumb and os.path.isfile(out_tif_medium):
            # Create PNG thumbnail
            self.log_info(resource, "Converting 10pct to %s..." % out_png)
            cmd = "gdal_translate -of PNG %s %s" % (out_tif_medium, out_png)
            subprocess.call(cmd, shell=True)
            self.created += 1
            self.bytes += os.path.getsize(out_png)

        self.log_info(
            resource, "Hierarchy: %s / %s / %s / %s / %s" %
            (season_name, experiment_name,
             self.sensors.get_display_name(sensor=sensor_lookup),
             timestamp[:4], timestamp[5:7]))

        # Get dataset ID or create it, creating parent collections as needed
        target_dsid = build_dataset_hierarchy_crawl(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            season_name,
            experiment_name,
            self.sensors.get_display_name(sensor=sensor_lookup),
            timestamp[:4],
            timestamp[5:7],
            leaf_ds_name=dataset_name)

        # Upload full field image to Clowder
        content = {
            "comment":
            "This stitched image is computed based on an assumption that the scene is planar. \
                There are likely to be be small offsets near the boundary of two images anytime there are plants \
                at the boundary (because those plants are higher than the ground plane), or where the dirt is \
                slightly higher or lower than average.",
            "file_ids": parameters["file_paths"]
        }

        # If we newly created these files, upload to Clowder
        if self.thumb:
            generated_files = [out_tif_thumb]
        else:
            generated_files = [out_tif_medium, out_tif_full, out_png]
        for checked_file in generated_files:
            if os.path.isfile(checked_file):
                found_in_dest = check_file_in_dataset(connector, host,
                                                      secret_key, target_dsid,
                                                      checked_file)
                #, replacements=[("ir_fullfield", "fullfield"), ("L2", "L1")])
                if not found_in_dest:
                    id = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           checked_file)
                    meta = build_metadata(host, self.extractor_info, id,
                                          content, 'file')
                    upload_metadata(connector, host, secret_key, id, meta)

                    if checked_file == out_tif_full:
                        # Trigger downstream extractions on full resolution
                        if sensor_lookup == 'ir_fullfield':
                            submit_extraction(connector, host, secret_key, id,
                                              "terra.multispectral.meantemp")
                        elif sensor_lookup == 'rgb_fullfield' and checked_file.endswith(
                                "_mask.tif"):
                            submit_extraction(connector, host, secret_key, id,
                                              "terra.stereo-rgb.canopycover")

        if self.thumb:
            # TODO: Add parameters support to pyclowder submit_extraction()
            self.log_info(resource,
                          "triggering terra.geotiff.fieldmosaic_full")
            r = requests.post("%sapi/%s/%s/extractions?key=%s" %
                              (host, 'datasets', resource['id'], secret_key),
                              headers={"Content-Type": "application/json"},
                              data=json.dumps({
                                  "extractor":
                                  'terra.geotiff.fieldmosaic_full',
                                  "parameters": parameters
                              }))
            r.raise_for_status()

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Write the CSV to the same directory as the source file
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        time_fmt = timestamp + "T12:00:00-07:00"
        rootdir = self.sensors.create_sensor_path(timestamp,
                                                  sensor="rgb_fullfield",
                                                  ext=".csv")
        out_csv = os.path.join(
            os.path.dirname(rootdir),
            resource['name'].replace(".tif", "_canopycover_bety.csv"))
        out_geo = os.path.join(
            os.path.dirname(rootdir),
            resource['name'].replace(".tif", "_canopycover_geo.csv"))

        # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata...

        self.log_info(resource, "Writing BETY CSV to %s" % out_csv)
        csv_file = open(out_csv, 'w')
        (fields, traits) = get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo)
        geo_file = open(out_geo, 'w')
        geo_file.write(','.join([
            'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value',
            'timestamp'
        ]) + '\n')

        # Get full list of experiment plots using date as filter
        all_plots = get_site_boundaries(timestamp, city='Maricopa')
        self.log_info(resource,
                      "found %s plots on %s" % (len(all_plots), timestamp))
        successful_plots = 0
        for plotname in all_plots:
            if plotname.find("KSU") > -1:
                self.log_info(resource, "skipping %s" % plotname)
                continue

            bounds = all_plots[plotname]
            tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds))
            centroid_lonlat = json.loads(
                centroid_from_geojson(bounds))["coordinates"]

            # Use GeoJSON string to clip full field to this plot
            try:
                pxarray = clip_raster(resource['local_paths'][0], tuples)
                if pxarray is not None:
                    if len(pxarray.shape) < 3:
                        self.log_error(
                            resource, "unexpected array shape for %s (%s)" %
                            (plotname, pxarray.shape))
                        continue

                    ccVal = calculate_canopycover_masked(
                        rollaxis(pxarray, 0, 3))

                    if (ccVal > -1):
                        # Prepare and submit datapoint
                        geo_file.write(','.join([
                            plotname, 'Canopy Cover',
                            str(centroid_lonlat[1]),
                            str(centroid_lonlat[0]), time_fmt, host +
                            ("" if host.endswith("/") else "/") + "files/" +
                            resource['id'],
                            str(ccVal), timestamp
                        ]) + '\n')

                    successful_plots += 1
                    if successful_plots % 10 == 0:
                        self.log_info(
                            resource, "processed %s/%s plots" %
                            (successful_plots, len(all_plots)))
                else:
                    continue
            except:
                self.log_error(resource,
                               "error generating cc for %s" % plotname)
                continue

            if (ccVal > -1):
                traits['canopy_cover'] = str(ccVal)
                traits['site'] = plotname
                traits['local_datetime'] = timestamp + "T12:00:00"
                trait_list = generate_traits_list(traits)
                csv_file.write(','.join(map(str, trait_list)) + '\n')

        csv_file.close()
        geo_file.close()

        # Upload this CSV to Clowder
        fileid = upload_to_dataset(connector, host, self.clowder_user,
                                   self.clowder_pass, resource['parent']['id'],
                                   out_csv)
        geoid = upload_to_dataset(connector, host, self.clowder_user,
                                  self.clowder_pass, resource['parent']['id'],
                                  out_geo)

        # Add metadata to original dataset indicating this was run
        self.log_info(resource, "updating file metadata")
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"files_created": [fileid, geoid]}, 'file')
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        # Trigger separate extractors
        self.log_info(resource, "triggering BETY extractor on %s" % fileid)
        submit_extraction(connector, host, secret_key, fileid, "terra.betydb")
        self.log_info(resource,
                      "triggering geostreams extractor on %s" % geoid)
        submit_extraction(connector, host, secret_key, geoid,
                          "terra.geostreams")

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        """Performs plot level image extraction

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            resource(dict): dictionary containing the resources associated with the request
            parameters(json): json object of the triggering message contents
        """
        self.start_message(resource)
        super(ClipByShape, self).process_message(connector, host, secret_key,
                                                 resource, parameters)

        # Handle any parameters
        if isinstance(parameters, basestring):
            parameters = json.loads(parameters)
        if isinstance(parameters, unicode):
            parameters = json.loads(str(parameters))

        # Initialize local variables
        dataset_name = parameters["datasetname"]
        season_name, experiment_name = "Unknown Season", "Unknown Experiment"
        datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None

        # Array containing the links to uploaded files
        uploaded_file_ids = []

        # Find the files we're interested in
        # pylint: disable=line-too-long
        (shapefile, shxfile, dbffile,
         imagefiles) = self.find_shape_image_files(resource['local_paths'],
                                                   resource['triggering_file'])
        # pylint: enable=line-too-long
        if shapefile is None:
            self.log_skip(resource, "No shapefile found")
            return
        if shxfile is None:
            self.log_skip(resource, "No SHX file found")
            return
        num_image_files = len(imagefiles)
        if num_image_files <= 0:
            self.log_skip(resource,
                          "No image files with geographic boundaries found")
            return

        # Get the best username, password, and space
        old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass,
                                     self.clowderspace)
        self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context(
        )

        # Ensure that the clowder information is valid
        if not confirm_clowder_info(host, secret_key, self.clowderspace,
                                    self.clowder_user, self.clowder_pass):
            self.log_error(resource, "Clowder configuration is invalid. Not processing " +\
                                     "request")
            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)
            return

        # Change the base path of files to include the user by tweaking the sensor's value
        sensor_old_base = None
        if self.get_terraref_metadata is None:
            _, new_base = self.get_username_with_base_path(
                host, secret_key, resource['id'], self.sensors.base)
            sensor_old_base = self.sensors.base
            self.sensors.base = new_base

        try:
            # Build up a list of image IDs
            image_ids = {}
            if 'files' in resource:
                for one_image in imagefiles:
                    image_name = os.path.basename(one_image)
                    for res_file in resource['files']:
                        if ('filename' in res_file) and ('id' in res_file) and \
                                                            (image_name == res_file['filename']):
                            image_ids[image_name] = res_file['id']

            # Get timestamps. Also get season and experiment information for Clowder collections
            datestamp = self.find_datestamp(dataset_name)
            timestamp = timestamp_to_terraref(
                self.find_timestamp(dataset_name))
            (season_name, experiment_name,
             _) = self.get_season_and_experiment(datestamp, self.sensor_name)

            if self.experiment_metadata:
                if 'extractors' in self.experiment_metadata:
                    extractor_json = self.experiment_metadata['extractors']
                    if 'shapefile' in extractor_json:
                        if 'plot_column_name' in extractor_json['shapefile']:
                            plot_name_idx = extractor_json['shapefile'][
                                'plot_column_name']

            # Check our current local variables
            if dbffile is None:
                self.log_info(resource,
                              "DBF file not found, using default plot naming")
            self.log_info(resource, "Extracting plots using shapefile '" + \
                                                        os.path.basename(shapefile) + "'")

            # Load the shapes and find the plot name column if we have a DBF file
            shape_in = ogr.Open(shapefile)
            layer = shape_in.GetLayer(
                os.path.split(os.path.splitext(shapefile)[0])[1])
            feature = layer.GetNextFeature()
            layer_ref = layer.GetSpatialRef()

            if dbffile:
                shape_table = DBF(dbffile,
                                  lowernames=True,
                                  ignore_missing_memofile=True)
                shape_rows = iter(list(shape_table))

                # Make sure if we have the column name of plot-names specified that it exists in
                # the shapefile
                column_names = shape_table.field_names
                if not plot_name_idx is None:
                    if not find_all_plot_names(plot_name_idx, column_names):
                        ValueError(
                            resource,
                            "Shapefile data does not have specified plot name"
                            + " column '" + plot_name_idx + "'")

                # Lookup a plot name field to use
                if plot_name_idx is None:
                    for one_name in column_names:
                        # pylint: disable=line-too-long
                        if one_name == "observationUnitName":
                            plot_name_idx = one_name
                            break
                        elif (one_name.find('plot') >= 0) and (
                            (one_name.find('name') >= 0)
                                or one_name.find('id')):
                            plot_name_idx = one_name
                            break
                        elif one_name == 'id':
                            plot_name_idx = one_name
                            break
                        # pylint: enable=line-too-long
                if plot_name_idx is None:
                    ValueError(
                        resource,
                        "Shapefile data does not have a plot name field '" +
                        os.path.basename(dbffile) + "'")

            # Setup for the extracted plot images
            plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \
                                                                                    " (By Plot)"

            # Loop through each polygon and extract plot level data
            alternate_plot_id = 0
            while feature:

                # Current geometry to extract
                plot_poly = feature.GetGeometryRef()
                if layer_ref:
                    plot_poly.AssignSpatialReference(layer_ref)
                plot_spatial_ref = plot_poly.GetSpatialReference()

                # Determie the plot name to use
                plot_name = None
                alternate_plot_id = alternate_plot_id + 1
                if shape_rows and plot_name_idx:
                    try:
                        row = next(shape_rows)
                        plot_name = get_plot_name(plot_name_idx, row)
                    except StopIteration:
                        pass
                if not plot_name:
                    plot_name = "plot_" + str(alternate_plot_id)

                # Determine output dataset name
                leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp
                self.log_info(
                    resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
                    (season_name, experiment_name, plot_display_name,
                     datestamp[:4], datestamp[5:7], datestamp[8:10],
                     leaf_dataset))

                # Create the dataset, even if we have no data to put in it, so that the caller knows
                # it was addressed
                target_dsid = build_dataset_hierarchy_crawl(
                    host,
                    secret_key,
                    self.clowder_user,
                    self.clowder_pass,
                    self.clowderspace,
                    season_name,
                    experiment_name,
                    plot_display_name,
                    datestamp[:4],
                    datestamp[5:7],
                    datestamp[8:10],
                    leaf_ds_name=leaf_dataset)

                # Loop through all the images looking for overlap
                for filename in imagefiles:

                    # Get the bounds. We also get the reference systems in case we need to convert
                    # between them
                    bounds = imagefiles[filename]['bounds']
                    bounds_spatial_ref = bounds.GetSpatialReference()

                    # Checking for geographic overlap and skip if there is none
                    if not bounds_spatial_ref.IsSame(plot_spatial_ref):
                        # We need to convert coordinate system before an intersection
                        transform = osr.CoordinateTransformation(
                            bounds_spatial_ref, plot_spatial_ref)
                        new_bounds = bounds.Clone()
                        if new_bounds:
                            new_bounds.Transform(transform)
                            intersection = plot_poly.Intersection(new_bounds)
                            new_bounds = None
                    else:
                        # Same coordinate system. Simple intersection
                        intersection = plot_poly.Intersection(bounds)

                    if intersection.GetArea() == 0.0:
                        self.log_info(resource, "Skipping image: " + filename)
                        continue

                    # Determine where we're putting the clipped file on disk and determine overwrite
                    # pylint: disable=unexpected-keyword-arg
                    out_file = self.sensors.create_sensor_path(
                        timestamp,
                        filename=os.path.basename(filename),
                        plot=plot_name,
                        subsensor=self.sensor_name)
                    if (file_exists(out_file) and not self.overwrite):
                        # The file exists and don't want to overwrite it
                        self.logger.warn("Skipping existing output file: %s",
                                         out_file)
                        continue

                    self.log_info(
                        resource, "Attempting to clip '" + filename +
                        "' to polygon number " + str(alternate_plot_id))

                    # Create destination folder on disk if we haven't done that already
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    # Clip the raster
                    bounds_tuple = polygon_to_tuples_transform(
                        plot_poly, bounds_spatial_ref)

                    clip_pix = clip_raster(filename,
                                           bounds_tuple,
                                           out_path=out_file)
                    if clip_pix is None:
                        self.log_error(
                            resource,
                            "Failed to clip image to plot name " + plot_name)
                        continue

                    # Upload the clipped image to the dataset
                    found_in_dest = check_file_in_dataset(
                        connector,
                        host,
                        secret_key,
                        target_dsid,
                        out_file,
                        remove=self.overwrite)
                    if not found_in_dest or self.overwrite:
                        image_name = os.path.basename(filename)
                        content = {
                            "comment":
                            "Clipped from shapefile " +
                            os.path.basename(shapefile),
                            "imageName":
                            image_name
                        }
                        if image_name in image_ids:
                            content['imageID'] = image_ids[image_name]

                        fileid = upload_to_dataset(connector, host,
                                                   self.clowder_user,
                                                   self.clowder_pass,
                                                   target_dsid, out_file)
                        uploaded_file_ids.append(fileid)

                        # Generate our metadata
                        meta = build_metadata(host, self.extractor_info,
                                              fileid, content, 'file')
                        clowder_file.upload_metadata(connector, host,
                                                     secret_key, fileid, meta)
                    else:
                        self.logger.warn(
                            "Skipping existing file in dataset: %s", out_file)

                    self.created += 1
                    self.bytes += os.path.getsize(out_file)

                # Get the next shape to extract
                feature = layer.GetNextFeature()

            # Tell Clowder this is completed so subsequent file updates don't daisy-chain
            id_len = len(uploaded_file_ids)
            if id_len > 0 or self.created > 0:
                extractor_md = build_metadata(
                    host, self.extractor_info, resource['id'],
                    {"files_created": uploaded_file_ids}, 'dataset')
                self.log_info(
                    resource,
                    "Uploading shapefile plot extractor metadata to Level_2 dataset: "
                    + str(extractor_md))
                clowder_dataset.remove_metadata(connector, host, secret_key,
                                                resource['id'],
                                                self.extractor_info['name'])
                clowder_dataset.upload_metadata(connector, host, secret_key,
                                                resource['id'], extractor_md)
            else:
                self.logger.warn(
                    "Skipping dataset metadata updating since no files were loaded"
                )

        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if not sensor_old_base is None:
                self.sensors.base = sensor_old_base

            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)
Esempio n. 12
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        if type(parameters) is str:
            parameters = json.loads(parameters)
        if 'parameters' in parameters:
            parameters = parameters['parameters']
        if type(parameters) is unicode:
            parameters = json.loads(str(parameters))

        # Input path will suggest which sensor we are seeing
        sensor_type = None
        for f in resource['files']:
            filepath = f['filepath']
            for sens in ["rgb_geotiff", "ir_geotiff", "laser3d_heightmap"]:
                if filepath.find(sens) > -1:
                    sensor_type = sens.split("_")[0]
                    break
            if sensor_type is not None:
                break

        # dataset_name = "Full Field - 2017-01-01"
        dataset_name = parameters["output_dataset"]
        scan_name = parameters["scan_type"] if "scan_type" in parameters else ""
        timestamp = dataset_name.split(" - ")[1]

        out_tif_full = self.sensors.create_sensor_path(
            timestamp, opts=[sensor_type, scan_name])
        out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif")
        out_vrt = out_tif_full.replace(".tif", ".vrt")
        out_dir = os.path.dirname(out_vrt)

        if os.path.exists(out_vrt) and not self.overwrite:
            self.log_skip(resource,
                          "%s already exists; ending process" % out_vrt)
            return

        if not self.darker or sensor_type != 'rgb':
            (nu_created, nu_bytes) = self.generateSingleMosaic(
                connector, host, secret_key, sensor_type, out_dir, out_vrt,
                out_tif_thumb, out_tif_full, parameters, resource)
        else:
            (nu_created, nu_bytes) = self.generateDarkerMosaic(
                connector, host, secret_key, sensor_type, out_dir, out_vrt,
                out_tif_thumb, out_tif_full, parameters, resource)
        self.created += nu_created
        self.bytes += nu_bytes

        # Get dataset ID or create it, creating parent collections as needed
        target_dsid = build_dataset_hierarchy(host,
                                              secret_key,
                                              self.clowder_user,
                                              self.clowder_pass,
                                              self.clowderspace,
                                              self.sensors.get_display_name(),
                                              timestamp[:4],
                                              timestamp[5:7],
                                              leaf_ds_name=dataset_name)

        # Upload full field image to Clowder
        content = {
            "comment":
            "This stitched image is computed based on an assumption that the scene is planar. \
                There are likely to be be small offsets near the boundary of two images anytime there are plants \
                at the boundary (because those plants are higher than the ground plane), or where the dirt is \
                slightly higher or lower than average.",
            "file_ids": parameters["file_paths"]
        }

        if os.path.exists(out_tif_thumb):
            thumbid = upload_to_dataset(connector, host, self.clowder_user,
                                        self.clowder_pass, target_dsid,
                                        out_tif_thumb)
            thumbmeta = build_metadata(host, self.extractor_info, thumbid,
                                       content, 'file')
            upload_metadata(connector, host, secret_key, thumbid, thumbmeta)

        if os.path.exists(out_tif_full):
            fullid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       out_tif_full)
            fullmeta = build_metadata(host, self.extractor_info, fullid,
                                      content, 'file')
            upload_metadata(connector, host, secret_key, fullid, fullmeta)

        self.end_message(resource)
Esempio n. 13
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        stream_name = 'Energy Farm Observations'
        disp_name = self.sensors.get_display_name()
        if 'Weather CEN' in resource['name']:
            curr_sens = disp_name + ' - CEN'
            stream_name += ' CEN'
            main_coords = [-88.199801, 40.062051, 0]
        elif 'WeatherNE' in resource['name']:
            curr_sens = disp_name + ' - NE'
            stream_name += ' NE'
            main_coords = [-88.193298, 40.067379, 0]
        elif 'WeatherSE' in resource['name']:
            curr_sens = disp_name + ' - SE'
            stream_name += ' SE'
            main_coords = [-88.193573, 40.056910, 0]
        geom = {"type": "Point", "coordinates": main_coords}

        # Get sensor or create if not found
        sensor_data = get_sensor_by_name(connector, host, secret_key,
                                         curr_sens)
        if not sensor_data:
            sensor_id = create_sensor(connector, host, secret_key, curr_sens,
                                      geom, {
                                          "id": "Met Station",
                                          "title": "Met Station",
                                          "sensorType": 4
                                      }, "Urbana")
        else:
            sensor_id = sensor_data['id']

        # Get stream or create if not found
        stream_data = get_stream_by_name(connector, host, secret_key,
                                         stream_name)
        if not stream_data:
            stream_id = create_stream(connector, host, secret_key, stream_name,
                                      sensor_id, geom)
        else:
            stream_id = stream_data['id']

        # Get metadata to check till what time the file was processed last. Start processing the file after this time
        allmd = download_metadata(connector, host, secret_key, resource['id'])
        last_processed_time = 0
        datapoint_count = 0
        for md in allmd:
            if 'content' in md and 'last processed time' in md['content']:
                last_processed_time = md['content']['last processed time']
                if 'datapoints_created' in md['content']:
                    datapoint_count = md['content']['datapoints_created']
                else:
                    datapoint_count = 0
                delete_metadata(connector, host, secret_key, resource['id'],
                                md['agent']['name'].split("/")[-1])

        # Parse file and get all the records in it.
        ISO_8601_UTC_OFFSET = dateutil.tz.tzoffset("-07:00", -7 * 60 * 60)
        records = parse_file(resource["local_paths"][0],
                             last_processed_time,
                             utc_offset=ISO_8601_UTC_OFFSET)
        # Add props to each record.
        for record in records:
            record['properties']['source_file'] = resource['id']
            record['stream_id'] = str(stream_id)

        total_dp = 0
        datapoint_list = []
        for record in records:
            datapoint_list.append({
                "start_time": record['start_time'],
                "end_time": record['end_time'],
                "type": "Point",
                "geometry": record['geometry'],
                "properties": record['properties']
            })
            if len(datapoint_list) > self.batchsize:
                create_datapoints(connector, host, secret_key, stream_id,
                                  datapoint_list)
                total_dp += len(datapoint_list)
                datapoint_list = []
        if len(datapoint_list) > 0:
            create_datapoints(connector, host, secret_key, stream_id,
                              datapoint_list)
            total_dp += len(datapoint_list)

        # Mark dataset as processed
        metadata = build_metadata(
            host, self.extractor_info, resource['id'], {
                "last processed time": records[-1]["end_time"],
                "datapoints_created": datapoint_count + total_dp
            }, 'file')
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        self.end_message()
Esempio n. 14
0
    def perform_uploads(self, connector, host, secret_key, resource,
                        default_dsid, content, season_name, experiment_name,
                        timestamp):
        """Perform the uploading of all the files we're put onto the upload list

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            default_dsid(str): the default dataset to load files to
            content(str): content information for the files we're uploading
            season_name(str): the name of the season
            experiment_name(str): the name of the experiment
            timestamp(str): the timestamp string associated with the source dataset

        Notes:
            We loop through the files, compressing, and remapping the names as needed.
            If the sensor associated with the file is missing, we upload the file to
            the default dataset. Otherwise, we use the dataset associated with the sensor
            and create the dataset if necessary
        """
        for one_file in self.files_to_upload:
            sourcefile = os.path.join(one_file["source_path"],
                                      one_file["source_name"])

            # Make sure we have the original file and then compress it if needed, or remane is
            if os.path.isfile(sourcefile):
                # make sure we have the full destination path
                if not os.path.exists(one_file["dest_path"]):
                    os.makedirs(one_file["dest_path"])

                resultfile = os.path.join(one_file["dest_path"],
                                          one_file["dest_name"])
                if one_file["compress"]:
                    resultfile = resultfile + ".zip"
                    with open(sourcefile, 'rb') as f_in:
                        with gzip.open(resultfile, 'wb') as f_out:
                            shutil.copyfileobj(f_in, f_out)
                elif not sourcefile == resultfile:
                    shutil.move(sourcefile, resultfile)

                # Find or create the target dataset for this entry if it doesn't exist
                cur_dataset_id = default_dsid
                if "sensor" in one_file:
                    sensor_type = one_file["sensor"]
                    if sensor_type in self.sensor_dsid_map:
                        cur_dataset_id = self.sensor_dsid_map[sensor_type]
                    else:
                        new_sensor = Sensors(base=self.sensors.base,
                                             station=self.sensors.station,
                                             sensor=sensor_type)

                        sensor_leaf_name = new_sensor.get_display_name(
                        ) + ' - ' + timestamp
                        ds_exists = get_datasetid_by_name(
                            host, secret_key, sensor_leaf_name)
                        new_dsid = build_dataset_hierarchy_crawl(
                            host,
                            secret_key,
                            self.clowder_user,
                            self.clowder_pass,
                            self.clowderspace,
                            season_name,
                            experiment_name,
                            new_sensor.get_display_name(),
                            timestamp[:4],
                            timestamp[5:7],
                            timestamp[8:10],
                            leaf_ds_name=sensor_leaf_name)

                        if (self.overwrite_ok
                                or not ds_exists) and self.experiment_metadata:
                            self.update_dataset_extractor_metadata(
                                connector, host, secret_key, new_dsid,
                                prepare_pipeline_metadata(
                                    self.experiment_metadata),
                                self.extractor_info['name'])

                        self.sensor_dsid_map[sensor_type] = new_dsid
                        cur_dataset_id = new_dsid

                # Check if file already exists in the dataset
                file_in_dataset = check_file_in_dataset(connector,
                                                        host,
                                                        secret_key,
                                                        cur_dataset_id,
                                                        resultfile,
                                                        remove=False)

                # If the files is already in the dataset, determine if we need to delete it first
                if self.overwrite_ok and file_in_dataset:
                    # Delete the file from the dataset before uploading the new copy
                    self.log_info(
                        resource,
                        "Removing existing file in dataset " + resultfile)
                    check_file_in_dataset(connector,
                                          host,
                                          secret_key,
                                          cur_dataset_id,
                                          resultfile,
                                          remove=True)
                elif not self.overwrite_ok and file_in_dataset:
                    # We won't overwrite an existing file
                    self.log_skip(
                        resource, "Not overwriting existing file in dataset " +
                        resultfile)
                    continue

                # Upload the file to the dataset
                fid = upload_to_dataset(connector, host, self.clowder_user,
                                        self.clowder_pass, cur_dataset_id,
                                        resultfile)

                # Generate our metadata
                meta = build_metadata(host, self.extractor_info, fid, content,
                                      'file')

                # Upload the meadata to the dataset
                upload_metadata(connector, host, secret_key, fid, meta)

                self.created += 1
                self.bytes += os.path.getsize(resultfile)
            else:
                raise Exception("%s was not found" % sourcefile)
Esempio n. 15
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # TODO: Get this from Clowder fixed metadata]
        main_coords = [-111.974304, 33.075576, 361]
        geom = {"type": "Point", "coordinates": main_coords}
        disp_name = self.sensors.get_display_name()

        # Get sensor or create if not found
        sensor_data = get_sensor_by_name(connector, host, secret_key,
                                         disp_name)
        if not sensor_data:
            sensor_id = create_sensor(
                connector, host, secret_key, disp_name, geom, {
                    "id": "MAC Met Station",
                    "title": "MAC Met Station",
                    "sensorType": 4
                }, "Maricopa")
        else:
            sensor_id = sensor_data['id']

        # Get stream or create if not found
        stream_name = "Irrigation Observations"
        stream_data = get_stream_by_name(connector, host, secret_key,
                                         stream_name)
        if not stream_data:
            stream_id = create_stream(connector, host, secret_key, stream_name,
                                      sensor_id, geom)
        else:
            stream_id = stream_data['id']

        # Process records in file
        records = parse_file(resource["local_paths"][0], main_coords)
        total_dp = 0
        datapoint_list = []
        for record in records:
            record['properties']['source_file'] = resource['id']
            datapoint_list.append({
                "start_time": record['start_time'],
                "end_time": record['end_time'],
                "type": "Point",
                "geometry": record['geometry'],
                "properties": record['properties']
            })
            if len(datapoint_list) > self.batchsize:
                create_datapoints(connector, host, secret_key, stream_id,
                                  datapoint_list)
                total_dp += len(datapoint_list)
                datapoint_list = []
        if len(datapoint_list) > 0:
            create_datapoints(connector, host, secret_key, stream_id,
                              datapoint_list)
            total_dp += len(datapoint_list)

        # Mark dataset as processed
        metadata = build_metadata(host, self.extractor_info, resource['id'],
                                  {"datapoints_created": len(records)}, 'file')
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get full list of experiment plots using date as filter
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        time_fmt = timestamp + "T12:00:00-07:00"
        rootdir = self.sensors.create_sensor_path(timestamp,
                                                  sensor="ir_meantemp",
                                                  ext=".csv")
        out_csv = os.path.join(
            os.path.dirname(rootdir),
            resource['name'].replace(".tif", "_meantemp_bety.csv"))
        out_geo = os.path.join(
            os.path.dirname(rootdir),
            resource['name'].replace(".tif", "_meantemp_geo.csv"))

        self.log_info(resource, "Writing BETY CSV to %s" % out_csv)
        csv_file = open(out_csv, 'w')
        (fields, traits) = get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo)
        geo_file = open(out_geo, 'w')
        geo_file.write(','.join([
            'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value',
            'timestamp'
        ]) + '\n')

        successful_plots = 0
        nan_plots = 0
        all_plots = get_site_boundaries(timestamp, city='Maricopa')
        for plotname in all_plots:
            if plotname.find("KSU") > -1:
                self.log_info(resource, "skipping %s" % plotname)
                continue

            bounds = all_plots[plotname]
            tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds))
            centroid_lonlat = json.loads(
                centroid_from_geojson(bounds))["coordinates"]

            # Use GeoJSON string to clip full field to this plot
            pxarray = clip_raster(resource['local_paths'][0], tuples,
                                  "/home/extractor/temp.tif")
            os.remove("/home/extractor/temp.tif")

            # Filter out any
            pxarray[pxarray < 0] = numpy.nan
            mean_tc = numpy.nanmean(pxarray) - 273.15

            # Create BETY-ready CSV
            if not numpy.isnan(mean_tc):
                geo_file.write(','.join([
                    plotname, 'IR Surface Temperature',
                    str(centroid_lonlat[1]),
                    str(centroid_lonlat[0]), time_fmt, host +
                    ("" if host.endswith("/") else "/") + "files/" +
                    resource['id'],
                    str(mean_tc), timestamp
                ]) + '\n')

                traits['surface_temperature'] = str(mean_tc)
                traits['site'] = plotname
                traits['local_datetime'] = timestamp + "T12:00:00"
                trait_list = generate_traits_list(traits)
                csv_file.write(','.join(map(str, trait_list)) + '\n')
            else:
                nan_plots += 1

            successful_plots += 1

        self.log_info(
            resource,
            "skipped %s of %s plots due to NaN" % (nan_plots, len(all_plots)))

        # submit CSV to BETY
        csv_file.close()
        geo_file.close()

        # Upload CSVs to Clowder
        fileid = upload_to_dataset(connector, host, self.clowder_user,
                                   self.clowder_pass, resource['parent']['id'],
                                   out_csv)
        geoid = upload_to_dataset(connector, host, self.clowder_user,
                                  self.clowder_pass, resource['parent']['id'],
                                  out_geo)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        self.log_info(resource, "updating file metadata")
        metadata = build_metadata(
            host, self.extractor_info, resource['parent']['id'], {
                "total_plots":
                len(all_plots),
                "plots_processed":
                successful_plots,
                "blank_plots":
                nan_plots,
                "files_created": [fileid, geoid],
                "betydb_link":
                "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature"
            }, 'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        # Trigger downstream extractors
        self.log_info(resource, "triggering BETY extractor on %s" % fileid)
        submit_extraction(connector, host, secret_key, fileid, "terra.betydb")
        self.log_info(resource,
                      "triggering geostreams extractor on %s" % geoid)
        submit_extraction(connector, host, secret_key, geoid,
                          "terra.geostreams")

        self.end_message(resource)