def upload_ds(conn, host, key, sensor, date, timestamp, ds_files, ds_meta):
    if len(ds_files) > 0:
        year, month, dd = date.split("-")
        if DRY_RUN:
            log("[%s] %s files" % (sensor+' - '+timestamp, len(ds_files)))
            return

        if TIMESTAMP_FOLDER:
            dataset_id = build_dataset_hierarchy(CONN, CLOWDER_HOST, CLOWDER_KEY, SPACE_ID,
                                                 sensor, year, month, dd, sensor+' - '+timestamp)
        else:
            dataset_id = build_dataset_hierarchy(CONN, CLOWDER_HOST, CLOWDER_KEY, SPACE_ID,
                                                 sensor, year, month, leaf_ds_name=sensor+' - '+date)

        log("adding files to Clowder dataset %s" % dataset_id)

        for FILEPATH in ds_files:
            upload_to_dataset(CONN, CLOWDER_HOST, CLOWDER_KEY, dataset_id, FILEPATH)
        if len(ds_meta.keys()) > 0:
            log("adding metadata to Clowder dataset %s" % dataset_id)
            format_md = {
                "@context": ["https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
                             {"@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#"}],
                "content": ds_meta,
                "agent": {
                    "@type": "cat:user",
                    "user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/58e2a7b9fe3ae3efc1632ae8"
                }
            }
            upload_metadata(CONN, CLOWDER_HOST, CLOWDER_KEY, dataset_id, format_md)
Ejemplo n.º 2
0
    def update_dataset_extractor_metadata(self, connector, host, key, dsid, metadata,\
                                          extractor_name):
        """Adds or replaces existing dataset metadata for the specified extractor

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            key(str): used with the host API
            dsid(str): the dataset to update
            metadata(str): the metadata string to update the dataset with
            extractor_name(str): the name of the extractor to associate the metadata with
        """
        meta = build_metadata(host, self.extractor_info, dsid, metadata,
                              "dataset")

        try:
            md = ds.download_metadata(connector, host, key, dsid,
                                      extractor_name)
            md_len = len(md)
        except Exception as ex:  # pylint: disable=broad-except
            md_len = 0
            logging.debug(ex.message)

        if md_len > 0:
            ds.remove_metadata(connector, host, key, dsid, extractor_name)

        ds.upload_metadata(connector, host, key, dsid, meta)
Ejemplo n.º 3
0
    def process_message_individual(self, connector, host, secret_key, resource,
                                   parameters):
        """This is deprecated method that operates on single capture, not field mosaic"""
        self.start_message()

        input_image = resource['local_paths'][0]

        # Create output in same directory as input, but check name
        ds_md = get_info(connector, host, secret_key, resource['parent']['id'])
        terra_md = get_terraref_metadata(
            download_metadata(connector, host, secret_key,
                              resource['parent']['id']), 'stereoTop')
        dataset_name = ds_md['name']
        timestamp = dataset_name.split(" - ")[1]

        # Is this left or right half?
        side = 'left' if resource['name'].find("_left") > -1 else 'right'
        gps_bounds = geojson_to_tuples(
            terra_md['spatial_metadata'][side]['bounding_box'])
        out_csv = self.sensors.create_sensor_path(timestamp,
                                                  opts=[side],
                                                  ext='csv')
        out_dgci = out_csv.replace(".csv", "_dgci.png")
        out_edge = out_csv.replace(".csv", "_edge.png")
        out_label = out_csv.replace(".csv", "_label.png")
        out_dgci_tif = out_dgci.replace('.png', '.tif')
        out_edge_tif = out_edge.replace('.png', '.tif')
        out_label_tif = out_label.replace('.png', '.tif')

        self.generate_all_outputs(input_image, out_csv, out_dgci, out_edge,
                                  out_label, gps_bounds)

        fileids = []
        for file_to_upload in [
                out_csv, out_dgci_tif, out_edge_tif, out_label_tif
        ]:
            if os.path.isfile(file_to_upload):
                if file_to_upload not in resource['local_paths']:
                    # TODO: Should this be written to a separate dataset?
                    #target_dsid = build_dataset_hierarchy(connector, host, secret_key, self.clowderspace,
                    #                                      self.sensors.get_display_name(),
                    #                                      timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=dataset_name)

                    # Send output to Clowder source dataset
                    fileids.append(
                        upload_to_dataset(connector, host, secret_key,
                                          resource['parent']['id'],
                                          file_to_upload))
                self.created += 1
                self.bytes += os.path.getsize(file_to_upload)

        # Add metadata to original dataset indicating this was run
        ext_meta = build_metadata(host, self.extractor_info,
                                  resource['parent']['id'],
                                  {"files_created": fileids}, 'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        ext_meta)

        self.end_message()
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        inPath = resource['local_paths'][0]

        # Determine output file path
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        out_file = self.create_sensor_path(timestamp,
                                           opts=['extracted_values'])
        uploaded_file_ids = []

        target_dsid = build_dataset_hierarchy(
            connector,
            host,
            secret_key,
            self.clowderspace,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[:7],
            timestamp[:10],
            leaf_ds_name=resource['dataset_info']['name'])

        # Extract NDVI values
        if not os.path.isfile(out_file) or self.overwrite:
            logging.info("...writing values to: %s" % out_file)
            data = open(inPath, "rb").read()
            values = float(data[49:66])
            data.close()
            with open(out_file, 'wb') as csvfile:
                fields = ['file_name', 'NDVI']  # fields name for csv file
                wr = csv.DictWriter(csvfile,
                                    fieldnames=fields,
                                    lineterminator='\n')
                wr.writeheader()
                wr.writerow({'file_name': resource['name'], 'NDVI': values})

            # TODO: Send this to geostreams

            fileid = upload_to_dataset(connector, host, secret_key,
                                       target_dsid, out_file)
            uploaded_file_ids.append(fileid)

            self.created += 1
            self.bytes += os.path.getsize(out_file)
        else:
            logging.info("%s already exists; skipping %s" %
                         (out_file, resource['id']))

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, metadata)

        self.end_message()
Ejemplo n.º 5
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        if not contains_required_files(
                resource,
            ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        if resource['dataset_info']['name'].find("SWIR") > -1:
            sensor_fullname = 'swir_netcdf'
        else:
            sensor_fullname = 'vnir_netcdf'

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                # Make sure outputs properly exist
                out_nc = self.sensors.get_sensor_path(timestamp,
                                                      sensor=sensor_fullname)
                if file_exists(out_nc):
                    self.log_skip(
                        resource, "metadata v%s and outputs already exist" %
                        self.extractor_info['version'])
                    return CheckMessage.ignore
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            # See if we can recover it from disk
            if sensor_fullname == 'vnir_netcdf':
                date = timestamp.split("__")[0]
                source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % (
                    date, timestamp)
                for f in os.listdir(source_dir):
                    if f.endswith("_metadata.json"):
                        self.log_info(resource,
                                      "updating metadata from %s" % f)
                        raw_dsmd = load_json_file(os.path.join(source_dir, f))
                        clean_md = clean_metadata(raw_dsmd, 'VNIR')
                        complete_md = build_metadata(host, self.extractor_info,
                                                     resource['id'], clean_md,
                                                     'dataset')
                        remove_metadata(connector, host, secret_key,
                                        resource['id'])
                        upload_metadata(connector, host, secret_key,
                                        resource['id'], complete_md)
                        return CheckMessage.download
            return CheckMessage.ignore
Ejemplo n.º 6
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message()

        for p in resource['local_paths']:
            if p.endswith(".bin"):
                input_dir = p.replace(os.path.basename(p), '')
                # TODO: Eventually light may be in separate location
                input_dir_light = input_dir

        # Determine output directory
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        out_name_base = self.sensors.create_sensor_path(timestamp, ext='')
        uploaded_file_ids = []

        subprocess.call(["octave --eval \"PSII(\'%s\',\'%s\' ,\'%s\')\"" %
                         (input_dir, input_dir_light, out_name_base)],shell=True);

        target_dsid = build_dataset_hierarchy(connector, host, secret_key, self.clowderspace,
                                              self.sensors.get_display_name(), timestamp[:4], timestamp[:7],
                                              timestamp[:10], leaf_ds_name=resource['dataset_info']['name'])

        for out_file in ["_Fm_dark", "_Fv_dark", "_FvFm_dark", "_Fm_light", "_Fv_light", "_FvFm_light",
                         "_Phi_PSII", "_NPQ", "_qN", "_qP", "_Rfd"]:
            full_out_name = out_name_base + out_file + ".png"
            if os.path.isfile(full_out_name) and full_out_name not in resource["local_paths"]:
                fileid = upload_to_dataset(connector, host, secret_key, target_dsid, full_out_name)
                uploaded_file_ids.append(fileid)
            self.created += 1
            self.bytes += os.path.getsize(full_out_name)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid, {
            "files_created": uploaded_file_ids}, 'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        self.end_message()
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get bin files and metadata
        metadata = None
        for f in resource['local_paths']:
            # First check metadata attached to dataset in Clowder for item of interest
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                metadata = get_terraref_metadata(all_dsmd, "ps2Top")
            # Otherwise, check if metadata was uploaded as a .json file
            elif f.endswith('_metadata.json') and f.find('/_metadata.json') == -1 and metadata is None:
                metadata = load_json_file(f)
        frames = {}
        for ind in range(0, 101):
            format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001
            for f in resource['local_paths']:
                if f.endswith(format_ind+'.bin'):
                    frames[ind] = f
        if None in [metadata] or len(frames) < 101:
            self.log_error(resource, 'could not find all of frames/metadata')
            return

        # Determine output directory
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        hist_path = self.sensors.create_sensor_path(timestamp, opts=['combined_hist'])
        coloredImg_path = self.sensors.create_sensor_path(timestamp, opts=['combined_pseudocolored'])
        uploaded_file_ids = []

        target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp)

        (img_width, img_height) = self.get_image_dimensions(metadata)
        gps_bounds = geojson_to_tuples(metadata['spatial_metadata']['ps2Top']['bounding_box'])

        self.log_info(resource, "image dimensions (w, h): (%s, %s)" % (img_width, img_height))

        png_frames = {}
        # skip 0101.bin since 101 is an XML file that lists the frame times
        for ind in range(0, 101):
            format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001
            png_path = self.sensors.create_sensor_path(timestamp, opts=[format_ind])
            tif_path = png_path.replace(".png", ".tif")
            png_frames[ind] = png_path
            if not os.path.exists(png_path) or self.overwrite:
                self.log_info(resource, "generating and uploading %s" % png_path)
                pixels = np.fromfile(frames[ind], np.dtype('uint8')).reshape([int(img_height), int(img_width)])
                create_image(pixels, png_path)
                create_geotiff(pixels, gps_bounds, tif_path, None, False, self.extractor_info, metadata)

                if png_path not in resource['local_paths']:
                    fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path)
                    uploaded_file_ids.append(fileid)
                self.created += 1
                self.bytes += os.path.getsize(png_path)

        # Generate aggregate outputs
        self.log_info(resource, "generating aggregates")
        if not (os.path.exists(hist_path) and os.path.exists(coloredImg_path)) or self.overwrite:
            # TODO: Coerce histogram and pseudocolor to geotiff?
            self.analyze(int(img_width), int(img_height), png_frames, hist_path, coloredImg_path)
            self.created += 2
            self.bytes += os.path.getsize(hist_path) + os.path.getsize(coloredImg_path)
        if hist_path not in resource['local_paths']:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, hist_path)
            uploaded_file_ids.append(fileid)
        if coloredImg_path not in resource['local_paths']:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, coloredImg_path)
            uploaded_file_ids.append(fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid, {
                                  "files_created": uploaded_file_ids}, 'dataset')
        self.log_info(resource, "uploading extractor metadata")
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        self.end_message(resource)
Ejemplo n.º 8
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get full list of experiment plots using date as filter
        ds_info = get_info(connector, host, secret_key, resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        time_fmt = timestamp+"T12:00:00-07:00"
        out_csv = self.sensors.create_sensor_path(timestamp, sensor="ir_meantemp", opts=["bety"])
        out_geo = out_csv.replace("_bety", "_geo")

        # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata...

        self.log_info(resource, "Writing BETY CSV to %s" % out_csv)
        csv_file = open(out_csv, 'w')
        (fields, traits) = get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo)
        geo_file = open(out_geo, 'w')
        geo_file.write(','.join(['site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp']) + '\n')

        successful_plots = 0
        nan_plots = 0
        all_plots = get_site_boundaries(timestamp, city='Maricopa')
        for plotname in all_plots:
            if plotname.find("KSU") > -1:
                self.log_info(resource, "skipping %s" % plotname)
                continue

            bounds = all_plots[plotname]
            tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds))
            centroid_lonlat = json.loads(centroid_from_geojson(bounds))["coordinates"]

            # Use GeoJSON string to clip full field to this plot
            pxarray = clip_raster(resource['local_paths'][0], tuples)

            # Filter out any
            pxarray[pxarray < 0] = numpy.nan
            mean_tc = numpy.nanmean(pxarray) - 273.15

            # Create BETY-ready CSV
            if not numpy.isnan(mean_tc):
                geo_file.write(','.join([plotname,
                                         'IR Surface Temperature',
                                         str(centroid_lonlat[1]),
                                         str(centroid_lonlat[0]),
                                         time_fmt,
                                         host + ("" if host.endswith("/") else "/") + "files/" + resource['id'],
                                         str(mean_tc),
                                         timestamp]) + '\n')

                traits['surface_temperature'] = str(mean_tc)
                traits['site'] = plotname
                traits['local_datetime'] = timestamp+"T12:00:00"
                trait_list = generate_traits_list(traits)
                csv_file.write(','.join(map(str, trait_list)) + '\n')
            else:
                nan_plots += 1

            successful_plots += 1

        self.log_info(resource, "skipped %s of %s plots due to NaN" % (nan_plots, len(all_plots)))

        # submit CSV to BETY
        csv_file.close()
        geo_file.close()

        # Upload CSVs to Clowder
        fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_csv)
        geoid  = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_geo)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        self.log_info(resource, "updating file metadata")
        metadata = build_metadata(host, self.extractor_info, resource['parent']['id'], {
            "total_plots": len(all_plots),
            "plots_processed": successful_plots,
            "blank_plots": nan_plots,
            "files_created": [fileid, geoid],
            "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature"
        }, 'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata)

        # Trigger downstream extractors
        self.log_info(resource, "triggering BETY extractor on %s" % fileid)
        submit_extraction(connector, host, secret_key, fileid, "terra.betydb")
        self.log_info(resource, "triggering geostreams extractor on %s" % geoid)
        submit_extraction(connector, host, secret_key, geoid, "terra.geostreams")

        self.end_message(resource)
Ejemplo n.º 9
0
    def process_message(self, connector, host, secret_key, resource, parameters):

        super(rgbEnhancementExtractor, self).process_message(connector, host, secret_key,
                                                             resource, parameters)

        self.start_message(resource)

        # Get left/right files and metadata
        process_files = []
        if not self.get_terraref_metadata is None:
            process_files = find_terraref_files(resource)
        else:
            process_files = find_image_files(self.args.identify_binary, resource,
                                             self.file_infodata_file_ending)

        # Get the best username, password, and space
        old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace)
        self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context()

        # Ensure that the clowder information is valid
        if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user,
                                    self.clowder_pass):
            self.log_error(resource, "Clowder configuration is invalid. Not processing " +\
                                     "request")
            self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space)
            self.end_message(resource)
            return

        # Change the base path of files to include the user by tweaking the sensor's value
        sensor_old_base = None
        if self.get_terraref_metadata is None:
            _, new_base = self.get_username_with_base_path(host, secret_key, resource['id'],
                                                           self.sensors.base)
            sensor_old_base = self.sensors.base
            self.sensors.base = new_base

        # Prepare for processing files
        timestamp = timestamp_to_terraref(self.find_timestamp(resource['dataset_info']['name']))
        target_dsid = resource['id']
        uploaded_file_ids = []
        ratios = []

        try:
            for one_file in process_files:

                mask_source = one_file

                # Make sure the source image is in the correct EPSG space
                epsg = get_epsg(one_file)
                if epsg != self.default_epsg:
                    self.log_info(resource, "Reprojecting from " + str(epsg) +
                                  " to default " + str(self.default_epsg))
                    _, tmp_name = tempfile.mkstemp()
                    src = gdal.Open(one_file)
                    gdal.Warp(tmp_name, src, dstSRS='EPSG:'+str(self.default_epsg))
                    mask_source = tmp_name

                # Get the bounds of the image to see if we can process it. Also get the mask filename
                rgb_mask_tif, bounds = self.get_maskfilename_bounds(mask_source, timestamp)

                if bounds is None:
                    self.log_skip(resource, "Skipping non-georeferenced image: " + \
                                                                    os.path.basename(one_file))
                    if mask_source != one_file:
                        os.remove(mask_source)
                    continue

                if not file_exists(rgb_mask_tif) or self.overwrite:
                    self.log_info(resource, "creating %s" % rgb_mask_tif)

                    mask_ratio, mask_rgb = gen_cc_enhanced(mask_source)
                    ratios.append(mask_ratio)

                    # Bands must be reordered to avoid swapping R and B
                    mask_rgb = cv2.cvtColor(mask_rgb, cv2.COLOR_BGR2RGB)

                    create_geotiff(mask_rgb, bounds, rgb_mask_tif, None, False, self.extractor_info,
                                   self.get_terraref_metadata)
                    compress_geotiff(rgb_mask_tif)

                    # Remove any temporary file
                    if mask_source != one_file:
                        os.remove(mask_source)

                    self.created += 1
                    self.bytes += os.path.getsize(rgb_mask_tif)

                found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid,
                                                      rgb_mask_tif, remove=self.overwrite)
                if not found_in_dest:
                    self.log_info(resource, "uploading %s" % rgb_mask_tif)
                    fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass,
                                               target_dsid, rgb_mask_tif)
                    uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") +
                                             "files/" + fileid)

            # Tell Clowder this is completed so subsequent file updates don't daisy-chain
            if not self.get_terraref_metadata is None:
                ratios_len = len(ratios)
                left_ratio = (ratios[0] if ratios_len > 0 else None)
                right_ratio = (ratios[1] if ratios_len > 1 else None)
                md = {
                    "files_created": uploaded_file_ids
                }
                if not left_ratio is None:
                    md["left_mask_ratio"] = left_ratio
                if not self.leftonly and not right_ratio is None:
                    md["right_mask_ratio"] = right_ratio
                extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset')
                self.log_info(resource, "uploading extractor metadata to Lv1 dataset")
                remove_metadata(connector, host, secret_key, resource['id'],
                                self.extractor_info['name'])
                upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if not sensor_old_base is None:
                self.sensors.base = sensor_old_base

            self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space)
            self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        """Performs plot level image extraction

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            resource(dict): dictionary containing the resources associated with the request
            parameters(json): json object of the triggering message contents
        """
        self.start_message(resource)
        super(ClipByShape, self).process_message(connector, host, secret_key,
                                                 resource, parameters)

        # Handle any parameters
        if isinstance(parameters, basestring):
            parameters = json.loads(parameters)
        if isinstance(parameters, unicode):
            parameters = json.loads(str(parameters))

        # Initialize local variables
        dataset_name = parameters["datasetname"]
        season_name, experiment_name = "Unknown Season", "Unknown Experiment"
        datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None

        # Array containing the links to uploaded files
        uploaded_file_ids = []

        # Find the files we're interested in
        # pylint: disable=line-too-long
        (shapefile, shxfile, dbffile,
         imagefiles) = self.find_shape_image_files(resource['local_paths'],
                                                   resource['triggering_file'])
        # pylint: enable=line-too-long
        if shapefile is None:
            self.log_skip(resource, "No shapefile found")
            return
        if shxfile is None:
            self.log_skip(resource, "No SHX file found")
            return
        num_image_files = len(imagefiles)
        if num_image_files <= 0:
            self.log_skip(resource,
                          "No image files with geographic boundaries found")
            return

        # Get the best username, password, and space
        old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass,
                                     self.clowderspace)
        self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context(
        )

        # Ensure that the clowder information is valid
        if not confirm_clowder_info(host, secret_key, self.clowderspace,
                                    self.clowder_user, self.clowder_pass):
            self.log_error(resource, "Clowder configuration is invalid. Not processing " +\
                                     "request")
            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)
            return

        # Change the base path of files to include the user by tweaking the sensor's value
        sensor_old_base = None
        if self.get_terraref_metadata is None:
            _, new_base = self.get_username_with_base_path(
                host, secret_key, resource['id'], self.sensors.base)
            sensor_old_base = self.sensors.base
            self.sensors.base = new_base

        try:
            # Build up a list of image IDs
            image_ids = {}
            if 'files' in resource:
                for one_image in imagefiles:
                    image_name = os.path.basename(one_image)
                    for res_file in resource['files']:
                        if ('filename' in res_file) and ('id' in res_file) and \
                                                            (image_name == res_file['filename']):
                            image_ids[image_name] = res_file['id']

            # Get timestamps. Also get season and experiment information for Clowder collections
            datestamp = self.find_datestamp(dataset_name)
            timestamp = timestamp_to_terraref(
                self.find_timestamp(dataset_name))
            (season_name, experiment_name,
             _) = self.get_season_and_experiment(datestamp, self.sensor_name)

            if self.experiment_metadata:
                if 'extractors' in self.experiment_metadata:
                    extractor_json = self.experiment_metadata['extractors']
                    if 'shapefile' in extractor_json:
                        if 'plot_column_name' in extractor_json['shapefile']:
                            plot_name_idx = extractor_json['shapefile'][
                                'plot_column_name']

            # Check our current local variables
            if dbffile is None:
                self.log_info(resource,
                              "DBF file not found, using default plot naming")
            self.log_info(resource, "Extracting plots using shapefile '" + \
                                                        os.path.basename(shapefile) + "'")

            # Load the shapes and find the plot name column if we have a DBF file
            shape_in = ogr.Open(shapefile)
            layer = shape_in.GetLayer(
                os.path.split(os.path.splitext(shapefile)[0])[1])
            feature = layer.GetNextFeature()
            layer_ref = layer.GetSpatialRef()

            if dbffile:
                shape_table = DBF(dbffile,
                                  lowernames=True,
                                  ignore_missing_memofile=True)
                shape_rows = iter(list(shape_table))

                # Make sure if we have the column name of plot-names specified that it exists in
                # the shapefile
                column_names = shape_table.field_names
                if not plot_name_idx is None:
                    if not find_all_plot_names(plot_name_idx, column_names):
                        ValueError(
                            resource,
                            "Shapefile data does not have specified plot name"
                            + " column '" + plot_name_idx + "'")

                # Lookup a plot name field to use
                if plot_name_idx is None:
                    for one_name in column_names:
                        # pylint: disable=line-too-long
                        if one_name == "observationUnitName":
                            plot_name_idx = one_name
                            break
                        elif (one_name.find('plot') >= 0) and (
                            (one_name.find('name') >= 0)
                                or one_name.find('id')):
                            plot_name_idx = one_name
                            break
                        elif one_name == 'id':
                            plot_name_idx = one_name
                            break
                        # pylint: enable=line-too-long
                if plot_name_idx is None:
                    ValueError(
                        resource,
                        "Shapefile data does not have a plot name field '" +
                        os.path.basename(dbffile) + "'")

            # Setup for the extracted plot images
            plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \
                                                                                    " (By Plot)"

            # Loop through each polygon and extract plot level data
            alternate_plot_id = 0
            while feature:

                # Current geometry to extract
                plot_poly = feature.GetGeometryRef()
                if layer_ref:
                    plot_poly.AssignSpatialReference(layer_ref)
                plot_spatial_ref = plot_poly.GetSpatialReference()

                # Determie the plot name to use
                plot_name = None
                alternate_plot_id = alternate_plot_id + 1
                if shape_rows and plot_name_idx:
                    try:
                        row = next(shape_rows)
                        plot_name = get_plot_name(plot_name_idx, row)
                    except StopIteration:
                        pass
                if not plot_name:
                    plot_name = "plot_" + str(alternate_plot_id)

                # Determine output dataset name
                leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp
                self.log_info(
                    resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
                    (season_name, experiment_name, plot_display_name,
                     datestamp[:4], datestamp[5:7], datestamp[8:10],
                     leaf_dataset))

                # Create the dataset, even if we have no data to put in it, so that the caller knows
                # it was addressed
                target_dsid = build_dataset_hierarchy_crawl(
                    host,
                    secret_key,
                    self.clowder_user,
                    self.clowder_pass,
                    self.clowderspace,
                    season_name,
                    experiment_name,
                    plot_display_name,
                    datestamp[:4],
                    datestamp[5:7],
                    datestamp[8:10],
                    leaf_ds_name=leaf_dataset)

                # Loop through all the images looking for overlap
                for filename in imagefiles:

                    # Get the bounds. We also get the reference systems in case we need to convert
                    # between them
                    bounds = imagefiles[filename]['bounds']
                    bounds_spatial_ref = bounds.GetSpatialReference()

                    # Checking for geographic overlap and skip if there is none
                    if not bounds_spatial_ref.IsSame(plot_spatial_ref):
                        # We need to convert coordinate system before an intersection
                        transform = osr.CoordinateTransformation(
                            bounds_spatial_ref, plot_spatial_ref)
                        new_bounds = bounds.Clone()
                        if new_bounds:
                            new_bounds.Transform(transform)
                            intersection = plot_poly.Intersection(new_bounds)
                            new_bounds = None
                    else:
                        # Same coordinate system. Simple intersection
                        intersection = plot_poly.Intersection(bounds)

                    if intersection.GetArea() == 0.0:
                        self.log_info(resource, "Skipping image: " + filename)
                        continue

                    # Determine where we're putting the clipped file on disk and determine overwrite
                    # pylint: disable=unexpected-keyword-arg
                    out_file = self.sensors.create_sensor_path(
                        timestamp,
                        filename=os.path.basename(filename),
                        plot=plot_name,
                        subsensor=self.sensor_name)
                    if (file_exists(out_file) and not self.overwrite):
                        # The file exists and don't want to overwrite it
                        self.logger.warn("Skipping existing output file: %s",
                                         out_file)
                        continue

                    self.log_info(
                        resource, "Attempting to clip '" + filename +
                        "' to polygon number " + str(alternate_plot_id))

                    # Create destination folder on disk if we haven't done that already
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    # Clip the raster
                    bounds_tuple = polygon_to_tuples_transform(
                        plot_poly, bounds_spatial_ref)

                    clip_pix = clip_raster(filename,
                                           bounds_tuple,
                                           out_path=out_file)
                    if clip_pix is None:
                        self.log_error(
                            resource,
                            "Failed to clip image to plot name " + plot_name)
                        continue

                    # Upload the clipped image to the dataset
                    found_in_dest = check_file_in_dataset(
                        connector,
                        host,
                        secret_key,
                        target_dsid,
                        out_file,
                        remove=self.overwrite)
                    if not found_in_dest or self.overwrite:
                        image_name = os.path.basename(filename)
                        content = {
                            "comment":
                            "Clipped from shapefile " +
                            os.path.basename(shapefile),
                            "imageName":
                            image_name
                        }
                        if image_name in image_ids:
                            content['imageID'] = image_ids[image_name]

                        fileid = upload_to_dataset(connector, host,
                                                   self.clowder_user,
                                                   self.clowder_pass,
                                                   target_dsid, out_file)
                        uploaded_file_ids.append(fileid)

                        # Generate our metadata
                        meta = build_metadata(host, self.extractor_info,
                                              fileid, content, 'file')
                        clowder_file.upload_metadata(connector, host,
                                                     secret_key, fileid, meta)
                    else:
                        self.logger.warn(
                            "Skipping existing file in dataset: %s", out_file)

                    self.created += 1
                    self.bytes += os.path.getsize(out_file)

                # Get the next shape to extract
                feature = layer.GetNextFeature()

            # Tell Clowder this is completed so subsequent file updates don't daisy-chain
            id_len = len(uploaded_file_ids)
            if id_len > 0 or self.created > 0:
                extractor_md = build_metadata(
                    host, self.extractor_info, resource['id'],
                    {"files_created": uploaded_file_ids}, 'dataset')
                self.log_info(
                    resource,
                    "Uploading shapefile plot extractor metadata to Level_2 dataset: "
                    + str(extractor_md))
                clowder_dataset.remove_metadata(connector, host, secret_key,
                                                resource['id'],
                                                self.extractor_info['name'])
                clowder_dataset.upload_metadata(connector, host, secret_key,
                                                resource['id'], extractor_md)
            else:
                self.logger.warn(
                    "Skipping dataset metadata updating since no files were loaded"
                )

        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if not sensor_old_base is None:
                self.sensors.base = sensor_old_base

            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)
Ejemplo n.º 11
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        tmp_csv = "meantemptraits.csv"
        csv_file = open(tmp_csv, 'w')
        (fields, traits) = get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        # Get full list of experiment plots using date as filter
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        dsmd = download_metadata(connector, host, secret_key,
                                 resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        all_plots = get_site_boundaries(timestamp, city='Maricopa')

        successful_plots = 0
        for plotname in all_plots:
            bounds = all_plots[plotname]

            # Use GeoJSON string to clip full field to this plot
            (pxarray, geotrans) = clip_raster(resource['local_paths'][0],
                                              bounds)
            #tc = getFlir.rawData_to_temperature(pxarray, terramd) # get temperature
            # Filter out any
            pxarray[pxarray < 0] = numpy.nan
            mean_tc = numpy.nanmean(pxarray) - 273.15

            # Create BETY-ready CSV
            if not numpy.isnan(mean_tc):
                traits['surface_temperature'] = str(mean_tc)
                traits['site'] = plotname
                traits['local_datetime'] = timestamp + "T12:00:00"
                trait_list = generate_traits_list(traits)
                #generate_csv(tmp_csv, fields, trait_list)
                csv_file.write(','.join(map(str, trait_list)) + '\n')

                # Prepare and submit datapoint
                centroid_lonlat = json.loads(
                    centroid_from_geojson(bounds))["coordinates"]
                time_fmt = timestamp + "T12:00:00-07:00"
                dpmetadata = {
                    "source":
                    host + ("" if host.endswith("/") else "/") + "files/" +
                    resource['id'],
                    "surface_temperature":
                    str(mean_tc)
                }
                create_datapoint_with_dependencies(
                    connector, host, secret_key, "IR Surface Temperature",
                    (centroid_lonlat[1], centroid_lonlat[0]), time_fmt,
                    time_fmt, dpmetadata, timestamp)

            successful_plots += 1

        # submit CSV to BETY
        csv_file.close()
        submit_traits(tmp_csv, betykey=self.bety_key)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(
            host, self.extractor_info, resource['parent']['id'], {
                "plots_processed":
                successful_plots,
                "plots_skipped":
                len(all_plots) - successful_plots,
                "betydb_link":
                "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature"
            }, 'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        metadata)

        self.end_message()
Ejemplo n.º 12
0
	def process_message(self, connector, host, secret_key, resource, parameters):
		self.start_message(resource)

		sensor_type, timestamp = resource['name'].split(" - ")

		# First, re-check metadata to verify it hasn't been added in meantime
		ds_md = download_metadata(connector, host, secret_key, resource['id'])
		terra_md = get_terraref_metadata(ds_md)
		if terra_md:
			self.log_info(resource, "Found TERRA-REF metadata; not cleaning")
			return

		# These datasets do not have TERRA md
		uncleanables = ["Full Field"]
		if sensor_type in uncleanables:
			self.log_info(resource, "Cannot clean metadata for %s" % sensor_type)
			return

		# For these datasets, we must get TERRA md from raw_data source
		lv1_types = {"RGB GeoTIFFs": "stereoTop",
					 "Thermal IR GeoTIFFs": "flirIrCamera"}
		if sensor_type in lv1_types:
			raw_equiv = resource['name'].replace(sensor_type, lv1_types[sensor_type])
			source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(raw_equiv))
		else:
			# Search for metadata.json source file
			source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(resource['name']))
		source_dir = self.remapMountPath(connector, source_dir)

		if self.delete:
			# Delete all existing metadata from this dataset
			self.log_info(resource, "Deleting existing metadata")
			delete_dataset_metadata(host, self.clowder_user, self.clowder_pass, resource['id'])

		# TODO: split between the PLY files (in Level_1) and metadata.json files - unique to this sensor
		if sensor_type == "scanner3DTop":
			source_dir = source_dir.replace("Level_1", "raw_data")

		self.log_info(resource, "Searching for metadata.json in %s" % source_dir)
		if os.path.isdir(source_dir):
			md_file = None
			for f in os.listdir(source_dir):
				if f.endswith("metadata.json"):
					md_file = os.path.join(source_dir, f)
			if md_file:
				self.log_info(resource, "Found metadata.json; cleaning")
				md_json = clean_metadata(load_json_file(md_file), sensor_type)
				format_md = {
					"@context": ["https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
								 {"@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#"}],
					"content": md_json,
					"agent": {
						"@type": "cat:user",
						"user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % self.userid
					}
				}
				self.log_info(resource, "Uploading cleaned metadata")
				upload_metadata(connector, host, secret_key, resource['id'], format_md)

				# Now trigger a callback extraction if given
				if len(self.callback) > 0:
					self.log_info(resource, "Submitting callback extraction to %s" % self.callback)
					submit_extraction(connector, host, secret_key, resource['id'], self.callback)
				else:
					callbacks = self.get_callbacks_by_sensor(sensor_type)
					if callbacks:
						for c in callbacks:
							self.log_info(resource, "Submitting callback extraction to %s" % c)
							submit_extraction(connector, host, secret_key, resource['id'], c)
					else:
						self.log_info(resource, "No default callback found for %s" % sensor_type)
			else:
				self.log_error(resource, "metadata.json not found in %s" % source_dir)

		else:
			self.log_error(resource, "%s could not be found" % source_dir)

		# TODO: Have extractor check for existence of Level_1 output product and delete if exists?

		self.end_message(resource)
Ejemplo n.º 13
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        """Performs plot level image extraction

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            resource(dict): dictionary containing the resources associated with the request
            parameters(json): json object of the triggering message contents
        """
        self.start_message(resource)
        super(CanopyCover, self).process_message(connector, host, secret_key,
                                                 resource, parameters)

        # Initialize local variables
        dataset_name = resource["name"]
        experiment_name = "Unknown Experiment"
        datestamp = None
        citation_auth_override, citation_title_override, citation_year_override = None, None, None
        config_specie = None

        store_in_geostreams = True
        store_in_betydb = True
        create_csv_files = True
        out_geo = None
        out_csv = None

        # Find the files we're interested in
        imagefiles = self.find_image_files(resource['local_paths'], resource)
        num_image_files = len(imagefiles)
        if num_image_files <= 0:
            self.log_skip(resource,
                          "No image files with geographic boundaries found")
            return

        # Setup overrides and get the restore function
        restore_fn = self.setup_overrides(host, secret_key, resource)
        if not restore_fn:
            self.end_message(resource)
            return

        try:
            # Get the best timestamp
            timestamp = terraref_timestamp_to_iso(
                self.find_timestamp(resource['dataset_info']['name']))
            if 'T' in timestamp:
                datestamp = timestamp.split('T')[0]
            else:
                datestamp = timestamp
                timestamp += 'T12:00:00'
            if timestamp.find('T') > 0 and timestamp.rfind(
                    '-') > 0 and timestamp.find('T') < timestamp.rfind('-'):
                # Convert to local time. We can do this due to site definitions having
                # the time offsets as part of their definition
                localtime = timestamp[0:timestamp.rfind('-')]
            else:
                localtime = timestamp
            _, experiment_name, _ = self.get_season_and_experiment(
                timestamp_to_terraref(timestamp), self.sensor_name)

            # Build up a list of image IDs
            image_ids = {}
            if 'files' in resource:
                for one_image in imagefiles:
                    image_name = os.path.basename(one_image)
                    for res_file in resource['files']:
                        if ('filename' in res_file) and ('id' in res_file) and \
                                                            (image_name == res_file['filename']):
                            image_ids[image_name] = res_file['id']

            file_filters = self.get_file_filters()
            if self.experiment_metadata:
                extractor_json = self.find_extractor_json()
                if extractor_json:
                    if 'citationAuthor' in extractor_json:
                        citation_auth_override = extractor_json[
                            'citationAuthor']
                    if 'citationYear' in extractor_json:
                        citation_year_override = extractor_json['citationYear']
                    if 'citationTitle' in extractor_json:
                        citation_title_override = extractor_json[
                            'citationTitle']
                    if 'noGeostreams' in extractor_json:
                        store_in_geostreams = False
                    if 'noBETYdb' in extractor_json:
                        store_in_betydb = False
                    if 'noCSV' in extractor_json:
                        create_csv_files = False

                if 'germplasmName' in self.experiment_metadata:
                    config_specie = self.experiment_metadata['germplasmName']

            # Setup for the extracted plot canopy cover
            sensor_name = "canopybyplot"

            # Create the output files
            rootdir = self.sensors.create_sensor_path(
                timestamp_to_terraref(timestamp),
                sensor=sensor_name,
                ext=".csv",
                opts=[experiment_name])
            (fields, traits) = get_traits_table()

            if create_csv_files:
                out_geo = os.path.splitext(rootdir)[0] + "_canopycover_geo.csv"
                self.log_info(resource,
                              "Writing Geostreams CSV to %s" % out_geo)
                out_csv = os.path.splitext(rootdir)[0] + "_canopycover.csv"
                self.log_info(resource,
                              "Writing Shapefile CSV to %s" % out_csv)

            # Setup default trait values
            if not config_specie is None:
                traits['species'] = config_specie
            if not citation_auth_override is None:
                traits['citation_author'] = citation_auth_override
            if not citation_title_override is None:
                traits['citation_title'] = citation_title_override
            if not citation_year_override is None:
                traits['citation_year'] = citation_year_override
            else:
                traits['citation_year'] = datestamp[:4]

            bety_csv_header = ','.join(map(str, fields))
            geo_csv_header = ','.join([
                'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value',
                'timestamp'
            ])

            # Loop through all the images (of which there should be one - see above)
            geo_rows = []
            bety_rows = []
            for filename in imagefiles:

                # Check if we're filtering files
                if file_filters:
                    if not file_filtered_in(filename, file_filters):
                        continue

                try:
                    cc_val = ""

                    # Load the pixels
                    clip_pix = np.array(gdal.Open(filename).ReadAsArray())

                    # Get additional, necessary data
                    centroid = imagefiles[filename]["bounds"].Centroid()
                    plot_name = _get_plot_name(
                        [resource['dataset_info']['name'], dataset_name])

                    cc_val = calculate_canopycover_masked(
                        np.rollaxis(clip_pix, 0, 3))

                    # Prepare the data for writing
                    image_clowder_id = ""
                    image_name = os.path.basename(filename)
                    if image_name in image_ids:
                        image_clowder_id = image_ids[image_name]

                    # Write the datapoint geographically and otherwise
                    csv_data = ','.join([
                        plot_name, 'Canopy Cover',
                        str(centroid.GetY()),
                        str(centroid.GetX()), localtime,
                        host.rstrip('/') + '/files/' + image_clowder_id,
                        str(cc_val), datestamp
                    ])
                    if out_geo:
                        self.write_csv_file(resource, out_geo, geo_csv_header,
                                            csv_data)
                    if store_in_geostreams:
                        geo_rows.append(csv_data)

                    traits['canopy_cover'] = str(cc_val)
                    traits['site'] = plot_name
                    traits['local_datetime'] = localtime
                    trait_list = generate_traits_list(traits)
                    csv_data = ','.join(map(str, trait_list))
                    if out_csv:
                        self.write_csv_file(resource, out_csv, bety_csv_header,
                                            csv_data)
                    if store_in_betydb:
                        bety_rows.append(csv_data)

                except Exception as ex:
                    self.log_error(
                        resource,
                        "Error generating canopy cover for %s" % plot_name)
                    self.log_error(resource, "    exception: %s" % str(ex))
                    continue

                # Only process the first file that's valid
                if num_image_files > 1:
                    self.log_info(
                        resource,
                        "Multiple image files were found, only using first found"
                    )
                    break

            # Upload any geostreams or betydb data
            if store_in_geostreams:
                if geo_rows:
                    try:
                        update_geostreams(connector, host, secret_key,
                                          geo_csv_header, geo_rows)
                    except Exception as ex:
                        self.log_error(
                            resource,
                            "Exception caught while updating geostreams: " +
                            str(ex))
                else:
                    self.log_info(
                        resource, "No geostreams data was generated to upload")

            if store_in_betydb:
                if bety_rows:
                    try:
                        update_betydb(bety_csv_header, bety_rows)
                    except Exception as ex:
                        self.log_error(
                            resource,
                            "Exception caught while updating betydb: " +
                            str(ex))
                else:
                    self.log_info(resource,
                                  "No BETYdb data was generated to upload")

            # Update this dataset with the extractor info
            dataset_id = self.get_dataset_id(host, secret_key, resource,
                                             dataset_name)
            try:
                # Tell Clowder this is completed so subsequent file updates don't daisy-chain
                self.log_info(resource, "updating dataset metadata")
                content = {
                    "comment": "Calculated greenness index",
                    "greenness value": cc_val
                }
                if self.experiment_metadata:
                    content.update(
                        prepare_pipeline_metadata(self.experiment_metadata))
                extractor_md = build_metadata(host, self.extractor_info,
                                              dataset_id, content, 'dataset')
                clowder_dataset.remove_metadata(connector, host, secret_key,
                                                dataset_id,
                                                self.extractor_info['name'])
                clowder_dataset.upload_metadata(connector, host, secret_key,
                                                dataset_id, extractor_md)

            except Exception as ex:
                self.log_error(
                    resource,
                    "Exception updating dataset metadata: " + str(ex))
        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if restore_fn:
                restore_fn()
            self.end_message(resource)
Ejemplo n.º 14
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get BIN file and metadata
        bin_file, terra_md_full = None, None
        for f in resource['local_paths']:
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera')
            elif f.endswith('_ir.bin'):
                bin_file = f
        if None in [bin_file, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp)
        tiff_path = self.sensors.create_sensor_path(timestamp)
        png_path = tiff_path.replace(".tif", ".png")
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product
        self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
        remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
        terra_md_trim = get_terraref_metadata(all_dsmd)
        if updated_experiment is not None:
            terra_md_trim['experiment_metadata'] = updated_experiment
        terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
        level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        skipped_png = False
        if not file_exists(png_path) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating & uploading %s" % png_path)
            raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
            raw_data = numpy.rot90(raw_data, 3)
            create_image(raw_data, png_path, self.scale_values)
            self.created += 1
            self.bytes += os.path.getsize(png_path)
        else:
            skipped_png = True
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        if not file_exists(tiff_path) or self.overwrite:
            # Generate temperature matrix and perform actual processing
            self.log_info(resource, "creating & uploading %s" % tiff_path)
            gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box'])
            if skipped_png:
                raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
                raw_data = numpy.rot90(raw_data, 3)
            tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature
            create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full)
            self.created += 1
            self.bytes += os.path.getsize(tiff_path)
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)
Ejemplo n.º 15
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Build list of JSON files
        json_files = []
        for f in resource['files']:
            if f['filename'].endswith("_environmentlogger.json"):
                if f['filepath'].startswith("/home/clowder"):
                    json_files.append(f['filepath'].replace(
                        "/home/clowder", "/home/extractor"))
                else:
                    json_files.append(f['filepath'])
        json_files.sort()

        # Determine full output path
        timestamp = resource['name'].split(" - ")[1]
        out_fullday_netcdf = self.sensors.create_sensor_path(timestamp)
        temp_out_full = os.path.join(os.path.dirname(out_fullday_netcdf),
                                     "temp_full.nc")
        temp_out_single = temp_out_full.replace("_full.nc", "_single.nc")
        geo_csv = out_fullday_netcdf.replace(".nc", "_geo.csv")

        if not file_exists(temp_out_full):
            for json_file in json_files:
                self.log_info(
                    resource, "converting %s to netCDF & appending" %
                    os.path.basename(json_file))
                ela.mainProgramTrigger(json_file, temp_out_single)
                cmd = "ncrcat --record_append %s %s" % (temp_out_single,
                                                        temp_out_full)
                subprocess.call([cmd], shell=True)
                os.remove(temp_out_single)

            shutil.move(temp_out_full, out_fullday_netcdf)
            self.created += 1
            self.bytes += os.path.getsize(out_fullday_netcdf)

        # Write out geostreams.csv
        if not file_exists(geo_csv):
            self.log_info(resource, "writing geostreams CSV")
            geo_file = open(geo_csv, 'w')
            geo_file.write(','.join([
                'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value',
                'timestamp'
            ]) + '\n')
            with Dataset(out_fullday_netcdf, "r") as ncdf:
                streams = set([
                    sensor_info.name
                    for sensor_info in ncdf.variables.values()
                    if sensor_info.name.startswith('sensor')
                ])
                for stream in streams:
                    if stream != "sensor_spectrum":
                        try:
                            memberlist = ncdf.get_variables_by_attributes(
                                sensor=stream)
                            for members in memberlist:
                                data_points = _produce_attr_dict(members)
                                for index in range(len(data_points)):
                                    dp_obj = data_points[index]
                                    if dp_obj["sensor"] == stream:
                                        time_format = "%Y-%m-%dT%H:%M:%S-07:00"
                                        time_point = (datetime.datetime(year=1970, month=1, day=1) + \
                                                      datetime.timedelta(days=ncdf.variables["time"][index])).strftime(time_format)

                                        geo_file.write(','.join([
                                            "Full Field - Environmental Logger",
                                            "(EL) %s" % stream,
                                            str(33.075576),
                                            str(-111.974304), time_point,
                                            host +
                                            ("" if host.endswith("/") else "/"
                                             ) + "datasets/" + resource['id'],
                                            '"%s"' % json.dumps(dp_obj).
                                            replace('"', '""'), timestamp
                                        ]) + '\n')

                        except:
                            self.log_error(
                                resource,
                                "NetCDF attribute not found: %s" % stream)

        # Fetch dataset ID by dataset name if not provided
        target_dsid = build_dataset_hierarchy_crawl(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            None,
            None,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        ds_files = get_file_list(connector, host, secret_key, target_dsid)
        found_full = False
        found_csv = False
        for f in ds_files:
            if f['filename'] == os.path.basename(out_fullday_netcdf):
                found_full = True
            if f['filename'] == os.path.basename(geo_csv):
                found_csv = True
        if not found_full:
            upload_to_dataset(connector, host, secret_key, target_dsid,
                              out_fullday_netcdf)
        if not found_csv:
            geoid = upload_to_dataset(connector, host, secret_key, target_dsid,
                                      geo_csv)
            self.log_info(resource,
                          "triggering geostreams extractor on %s" % geoid)
            submit_extraction(connector, host, secret_key, geoid,
                              "terra.geostreams")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"output_dataset": target_dsid}, 'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        self.end_message(resource)
Ejemplo n.º 16
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']

        left_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp,
                                                             opts=['left'])
        right_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp,
                                                              opts=['right'])
        uploaded_file_ids = []
        right_ratio, left_ratio = 0, 0

        left_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['right']['bounding_box'])
        #qual_md = get_extractor_metadata(all_dsmd, "terra.stereo-rgb.nrmac")
        if (not file_exists(left_rgb_mask_tiff)) or self.overwrite:
            self.log_info(resource, "creating %s" % left_rgb_mask_tiff)

            #if qual_md and 'left_quality_score' in qual_md:
            #left_ratio, left_rgb = gen_cc_enhanced(img_left, quality_score=int(qual_md['left_quality_score']))
            left_ratio, left_rgb = gen_cc_enhanced(img_left)

            if left_ratio is not None and left_rgb is not None:
                # Bands must be reordered to avoid swapping R and B
                left_rgb = cv2.cvtColor(left_rgb, cv2.COLOR_BGR2RGB)
                create_geotiff(left_rgb, left_bounds, left_rgb_mask_tiff, None,
                               False, self.extractor_info, terra_md_full)
                compress_geotiff(left_rgb_mask_tiff)
                self.created += 1
                self.bytes += os.path.getsize(left_rgb_mask_tiff)
            else:
                # If the masked version was not generated, delete any old version as well
                self.log_info(
                    resource, "a faulty version exists; deleting %s" %
                    left_rgb_mask_tiff)
                os.remove(left_rgb_mask_tiff)

        found_in_dest = check_file_in_dataset(connector, host, secret_key,
                                              target_dsid, left_rgb_mask_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_rgb_mask_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       left_rgb_mask_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        if not self.leftonly:
            if (not file_exists(right_rgb_mask_tiff)) or self.overwrite:

                right_ratio, right_rgb = gen_cc_enhanced(img_right)

                if right_ratio is not None and right_rgb is not None:
                    # Bands must be reordered to avoid swapping R and B
                    right_rgb = cv2.cvtColor(right_rgb, cv2.COLOR_BGR2RGB)
                    create_geotiff(right_rgb, right_bounds,
                                   right_rgb_mask_tiff, None, False,
                                   self.extractor_info, terra_md_full)
                    compress_geotiff(right_rgb_mask_tiff)
                    self.created += 1
                    self.bytes += os.path.getsize(right_rgb_mask_tiff)
                else:
                    # If the masked version was not generated, delete any old version as well
                    self.log_info(
                        resource, "a faulty version exists; deleting %s" %
                        right_rgb_mask_tiff)
                    os.remove(right_rgb_mask_tiff)

            found_in_dest = check_file_in_dataset(connector, host, secret_key,
                                                  target_dsid,
                                                  right_rgb_mask_tiff)
            if not found_in_dest:
                self.log_info(resource, "uploading %s" % right_rgb_mask_tiff)
                fileid = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           right_rgb_mask_tiff)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            md = {
                "files_created": uploaded_file_ids,
                "left_mask_ratio": left_ratio
            }
            if not self.leftonly:
                md["right_mask_ratio"] = right_ratio
            extractor_md = build_metadata(host, self.extractor_info,
                                          target_dsid, md, 'dataset')
            self.log_info(resource,
                          "uploading extractor metadata to Lv1 dataset")
            remove_metadata(connector, host, secret_key, resource['id'],
                            self.extractor_info['name'])
            upload_metadata(connector, host, secret_key, resource['id'],
                            extractor_md)

        self.end_message(resource)
Ejemplo n.º 17
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        # Get BIN file and metadata
        bin_file, metadata = None, None
        for f in resource['local_paths']:
            # First check metadata attached to dataset in Clowder for item of interest
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                metadata = get_terraref_metadata(all_dsmd, 'flirIrCamera')
            # Otherwise, check if metadata was uploaded as a .json file
            elif f.endswith('_ir.bin'):
                bin_file = f
        if None in [bin_file, metadata]:
            logging.getLogger(__name__).error(
                'could not find all both of ir.bin/metadata')
            return

        # Determine output directory
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        png_path = self.sensors.create_sensor_path(timestamp, ext='png')
        tiff_path = self.sensors.create_sensor_path(timestamp)
        uploaded_file_ids = []

        target_dsid = build_dataset_hierarchy(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)

        skipped_png = False
        if not os.path.exists(png_path) or self.overwrite:
            logging.getLogger(__name__).info("Generating %s" % png_path)
            # get raw data from bin file
            raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape(
                [480, 640]).astype('float')
            raw_data = numpy.rot90(raw_data, 3)
            create_image(raw_data, png_path, self.scale_values)
            # Only upload the newly generated file to Clowder if it isn't already in dataset
            if png_path not in resource["local_paths"]:
                fileid = upload_to_dataset(connector, host, secret_key,
                                           target_dsid, png_path)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            self.created += 1
            self.bytes += os.path.getsize(png_path)
        else:
            skipped_png = True

        if not os.path.exists(tiff_path) or self.overwrite:
            logging.getLogger(__name__).info("Generating temperature matrix")
            gps_bounds = geojson_to_tuples(
                metadata['spatial_metadata']['flirIrCamera']['bounding_box'])
            if skipped_png:
                raw_data = numpy.fromfile(bin_file,
                                          numpy.dtype('<u2')).reshape(
                                              [480, 640]).astype('float')
                raw_data = numpy.rot90(raw_data, 3)
            tc = getFlir.rawData_to_temperature(raw_data,
                                                metadata)  # get temperature

            logging.getLogger(__name__).info("Creating %s" % tiff_path)
            # Rename temporary tif after creation to avoid long path errors
            out_tmp_tiff = os.path.join(tempfile.gettempdir(),
                                        resource['id'].encode('utf8'))
            create_geotiff(tc, gps_bounds, out_tmp_tiff, None, True,
                           self.extractor_info, metadata)
            shutil.move(out_tmp_tiff, tiff_path)
            if tiff_path not in resource["local_paths"]:
                fileid = upload_to_dataset(connector, host, secret_key,
                                           target_dsid, tiff_path)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            self.created += 1
            self.bytes += os.path.getsize(tiff_path)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        # Upload original Lemnatec metadata to new Level_1 dataset
        md = get_terraref_metadata(all_dsmd)
        md['raw_data_source'] = host + ("" if host.endswith("/") else
                                        "/") + "datasets/" + resource['id']
        lemna_md = build_metadata(host, self.extractor_info, target_dsid, md,
                                  'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, lemna_md)

        self.end_message()
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        # Get bin files and metadata
        metadata = None
        for f in resource['local_paths']:
            # First check metadata attached to dataset in Clowder for item of interest
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                metadata = get_extractor_metadata(all_dsmd)
            # Otherwise, check if metadata was uploaded as a .json file
            elif f.endswith('_metadata.json') and f.find(
                    '/_metadata.json') == -1 and metadata is None:
                metadata = load_json_file(f)
        frames = {}
        for ind in range(0, 101):
            format_ind = "{0:0>4}".format(ind)  # e.g. 1 becomes 0001
            for f in resource['files']:
                if f['filename'].endswith(format_ind + '.bin'):
                    frames[ind] = f['filename']
        if None in [metadata] or len(frames) < 101:
            logging.error('could not find all of frames/metadata')
            return

        # Determine output directory
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        hist_path = self.sensors.create_sensor_path(timestamp,
                                                    opts=['combined_hist'])
        coloredImg_path = self.sensors.create_sensor_path(
            timestamp, opts=['combined_pseudocolored'])
        uploaded_file_ids = []

        target_dsid = build_dataset_hierarchy(
            connector,
            host,
            secret_key,
            self.clowderspace,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[:7],
            timestamp[:10],
            leaf_ds_name=resource['dataset_info']['name'])

        img_width = 1936
        img_height = 1216
        png_frames = {}
        # skip 0101.bin since 101 is an XML file that lists the frame times
        for ind in range(0, 101):
            format_ind = "{0:0>4}".format(ind)  # e.g. 1 becomes 0001
            png_path = self.sensors.create_sensor_path(timestamp,
                                                       opts=[format_ind])
            png_frames[ind] = png_path
            if not os.path.exists(png_path) or self.overwrite:
                logging.info("...generating and uploading %s" % png_path)
                pixels = numpy.fromfile(frames[ind],
                                        numpy.dtype('uint8')).reshape(
                                            [img_height, img_width])
                create_image(pixels, png_path)
                if png_path not in resource['local_paths']:
                    fileid = upload_to_dataset(connector, host, secret_key,
                                               target_dsid, png_path)
                    uploaded_file_ids.append(fileid)
                self.created += 1
                self.bytes += os.path.getsize(png_path)

        # Generate aggregate outputs
        logging.info("...generating aggregates")
        if not (os.path.exists(hist_path)
                and os.path.exists(coloredImg_path)) or self.overwrite:
            psiiCore.psii_analysis(png_frames, hist_path, coloredImg_path)
            self.created += 2
            self.bytes += os.path.getsize(hist_path)
            self.bytes += os.path.getsize(coloredImg_path)
        if hist_path not in resource['local_paths']:
            fileid = upload_to_dataset(connector, host, secret_key,
                                       target_dsid, hist_path)
            uploaded_file_ids.append(fileid)
        if coloredImg_path not in resource['local_paths']:
            fileid = upload_to_dataset(connector, host, secret_key,
                                       target_dsid, coloredImg_path)
            uploaded_file_ids.append(fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        self.end_message()
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']

        left_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                            opts=['left'])
        right_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                             opts=['right'])
        uploaded_file_ids = []

        left_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['right']['bounding_box'])

        if not file_exists(left_rgb_enh_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % left_rgb_enh_tiff)
            EI = getEnhancedImage(img_left)
            create_geotiff(EI, left_bounds, left_rgb_enh_tiff)
            self.created += 1
            self.bytes += os.path.getsize(left_rgb_enh_tiff)

        found_in_dest = check_file_in_dataset(connector,
                                              host,
                                              secret_key,
                                              target_dsid,
                                              left_rgb_enh_tiff,
                                              remove=self.overwrite)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_rgb_enh_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       left_rgb_enh_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        if not file_exists(right_rgb_enh_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % right_rgb_enh_tiff)
            EI = getEnhancedImage(img_right)
            create_geotiff(EI, right_bounds, right_rgb_enh_tiff)
            self.created += 1
            self.bytes += os.path.getsize(right_rgb_enh_tiff)

        found_in_dest = check_file_in_dataset(connector,
                                              host,
                                              secret_key,
                                              target_dsid,
                                              right_rgb_enh_tiff,
                                              remove=self.overwrite)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % right_rgb_enh_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       right_rgb_enh_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        self.log_info(resource, "uploading extractor metadata")
        remove_metadata(connector, host, secret_key, target_dsid,
                        self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, target_dsid, ext_meta)

        self.end_message(resource)
Ejemplo n.º 20
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                metadata = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.bin'):
                img_left = fname
            elif fname.endswith('_right.bin'):
                img_right = fname
        if None in [img_left, img_right, metadata]:
            self.log_error(
                "could not locate each of left+right+metadata in processing")
            raise ValueError(
                "could not locate each of left+right+metadata in processing")

        # Determine output location & filenames
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        self.log_info(resource, "determining image shapes & gps bounds")
        left_shape = bin2tiff.get_image_shape(metadata, 'left')
        right_shape = bin2tiff.get_image_shape(metadata, 'right')
        left_gps_bounds = geojson_to_tuples(
            metadata['spatial_metadata']['left']['bounding_box'])
        right_gps_bounds = geojson_to_tuples(
            metadata['spatial_metadata']['right']['bounding_box'])
        out_tmp_tiff = os.path.join(tempfile.gettempdir(),
                                    resource['id'].encode('utf8'))

        target_dsid = build_dataset_hierarchy(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)

        if (not os.path.isfile(left_tiff)) or self.overwrite:
            self.log_info(resource, "creating & uploading %s" % left_tiff)
            left_image = bin2tiff.process_image(left_shape, img_left, None)
            # Rename output.tif after creation to avoid long path errors
            create_geotiff(left_image, left_gps_bounds, out_tmp_tiff, None,
                           False, self.extractor_info, metadata)
            # TODO: we're moving zero byte files
            shutil.move(out_tmp_tiff, left_tiff)
            if left_tiff not in resource['local_paths']:
                fileid = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           left_tiff)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            else:
                self.log_info(
                    resource,
                    "file found in dataset already; not re-uploading")
            self.created += 1
            self.bytes += os.path.getsize(left_tiff)

        if (not os.path.isfile(right_tiff)) or self.overwrite:
            self.log_info(resource, "creating & uploading %s" % right_tiff)
            right_image = bin2tiff.process_image(right_shape, img_right, None)
            create_geotiff(right_image, right_gps_bounds, out_tmp_tiff, None,
                           False, self.extractor_info, metadata)
            shutil.move(out_tmp_tiff, right_tiff)
            if right_tiff not in resource['local_paths']:
                fileid = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           right_tiff)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            else:
                self.log_info(
                    resource,
                    "file found in dataset already; not re-uploading")
            self.created += 1
            self.bytes += os.path.getsize(right_tiff)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        self.log_info(resource, "uploading extractor metadata")
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        # Upload original Lemnatec metadata to new Level_1 dataset
        md = get_terraref_metadata(all_dsmd)
        md['raw_data_source'] = host + ("" if host.endswith("/") else
                                        "/") + "datasets/" + resource['id']
        lemna_md = build_metadata(host, self.extractor_info, target_dsid, md,
                                  'dataset')
        self.log_info(resource, "uploading LemnaTec metadata")
        upload_metadata(connector, host, secret_key, target_dsid, lemna_md)

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        tmp_csv = "canopycovertraits.csv"
        csv_file = open(tmp_csv, 'w')
        (fields, traits) = ccCore.get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        # Get full list of experiment plots using date as filter
        logging.info(connector)
        logging.info(host)
        logging.info(secret_key)
        logging.info(resource)
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        all_plots = get_site_boundaries(timestamp, city='Maricopa')

        successful_plots = 0
        for plotname in all_plots:
            bounds = all_plots[plotname]

            # Use GeoJSON string to clip full field to this plot
            try:
                (pxarray, geotrans) = clip_raster(resource['local_paths'][0],
                                                  bounds)
                if len(pxarray.shape) < 3:
                    logging.error("unexpected array shape for %s (%s)" %
                                  (plotname, pxarray.shape))
                    continue
                ccVal = ccCore.gen_cc_for_img(rollaxis(pxarray, 0, 3), 5)
                ccVal *= 100.0  # Make 0-100 instead of 0-1
                successful_plots += 1
                if successful_plots % 10 == 0:
                    logging.info("processed %s/%s plots successfully" %
                                 (successful_plots, len(all_plots)))
            except:
                logging.error("error generating cc for %s" % plotname)
                continue

            traits['canopy_cover'] = str(ccVal)
            traits['site'] = plotname
            traits['local_datetime'] = timestamp + "T12:00:00"
            trait_list = ccCore.generate_traits_list(traits)

            csv_file.write(','.join(map(str, trait_list)) + '\n')

            # Prepare and submit datapoint
            centroid_lonlat = json.loads(
                centroid_from_geojson(bounds))["coordinates"]
            time_fmt = timestamp + "T12:00:00-07:00"
            dpmetadata = {
                "source":
                host + ("" if host.endswith("/") else "/") + "files/" +
                resource['id'],
                "canopy_cover":
                ccVal
            }
            create_datapoint_with_dependencies(
                connector, host, secret_key, "Canopy Cover",
                (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
                dpmetadata, timestamp)

        # submit CSV to BETY
        csv_file.close()
        submit_traits(tmp_csv, betykey=self.bety_key)

        # Add metadata to original dataset indicating this was run
        ext_meta = build_metadata(
            host, self.extractor_info, resource['parent']['id'], {
                "plots_processed":
                successful_plots,
                "plots_skipped":
                len(all_plots) - successful_plots,
                "betydb_link":
                "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=canopy_cover"
            }, 'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        ext_meta)

        self.end_message()
Ejemplo n.º 22
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, terra_md_full = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.bin'):
                img_left = fname
            elif fname.endswith('_right.bin'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product if necessary
        target_md = download_metadata(connector, host, secret_key, target_dsid)
        if not get_extractor_metadata(target_md, self.extractor_info['name']):
            self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
            remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
            terra_md_trim = get_terraref_metadata(all_dsmd)
            if updated_experiment is not None:
                terra_md_trim['experiment_metadata'] = updated_experiment
            terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
            level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
            upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        try:
            left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left')
            gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box'])
            right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right')
            gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box'])
        except KeyError:
            self.log_error(resource, "spatial metadata not properly identified; sending to cleaner")
            submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner")
            return

        if (not file_exists(left_tiff)) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating %s" % left_tiff)
            left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None)
            create_geotiff(left_image, gps_bounds_left, left_tiff, None, True,
                           self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(left_tiff)
        # Check if the file should be uploaded, even if it was already created
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)


        if (not file_exists(right_tiff)) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating %s" % right_tiff)
            right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None)
            create_geotiff(right_image, gps_bounds_right, right_tiff, None, True,
                           self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(right_tiff)
        # Check if the file should be uploaded, even if it was already created
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % right_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            try:
                upload_metadata(connector, host, secret_key, resource['id'], extractor_md)
            except:
                self.log_info(resource, "problem uploading extractor metadata...")

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        """Performs plot level image extraction

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            resource(dict): dictionary containing the resources associated with the request
            parameters(json): json object of the triggering message contents
        """
        # pylint: disable=global-statement
        global SENSOR_NAME
        global FIELD_NAME_LIST

        self.start_message(resource)
        super(PlotExtractor, self).process_message(connector, host, secret_key,
                                                   resource, parameters)

        # Initialize local variables
        dataset_name = resource["name"]
        experiment_name = "Unknown Experiment"
        datestamp = None
        citation_auth_override, citation_title_override, citation_year_override = None, None, None
        config_specie = None

        # Intialize data writing overrides. We have some reverse logic here due to the intent of
        # the variables
        store_in_geostreams = True if not hasattr(configuration, "NEVER_WRITE_GEOSTREAMS") \
                                            else not getattr(configuration, "NEVER_WRITE_GEOSTREAMS")
        store_in_betydb = True if not hasattr(configuration, "NEVER_WRITE_BETYDB") \
                                            else not getattr(configuration, "NEVER_WRITE_BETYDB")
        create_csv_files = True if not hasattr(configuration, "NEVER_WRITE_CSV") \
                                            else not getattr(configuration, "NEVER_WRITE_CSV")
        out_geo = None
        out_csv = None

        # Find the files we're interested in
        imagefiles = self.find_image_files(resource['local_paths'])
        num_image_files = len(imagefiles)
        if num_image_files <= 0:
            self.log_skip(resource,
                          "No image files with geographic boundaries found")
            return

        # Setup overrides and get the restore function
        restore_fn = self.setup_overrides(host, secret_key, resource)
        if not restore_fn:
            self.end_message(resource)
            return

        try:
            # Get the best timestamp
            timestamp = terraref_timestamp_to_iso(
                self.find_timestamp(resource['dataset_info']['name']))
            if 'T' in timestamp:
                datestamp = timestamp.split('T')[0]
            else:
                datestamp = timestamp
                timestamp += 'T12:00:00'
            if timestamp.find('T') > 0 and timestamp.rfind(
                    '-') > 0 and timestamp.find('T') < timestamp.rfind('-'):
                # Convert to local time. We can do this due to site definitions having
                # the time offsets as part of their definition
                localtime = timestamp[0:timestamp.rfind('-')]
            else:
                localtime = timestamp
            _, experiment_name, _ = self.get_season_and_experiment(
                timestamp_to_terraref(timestamp), self.sensor_name)

            # Build up a list of image IDs
            image_ids = {}
            if 'files' in resource:
                for one_image in imagefiles:
                    image_name = os.path.basename(one_image)
                    for res_file in resource['files']:
                        if ('filename' in res_file) and ('id' in res_file) and \
                                                            (image_name == res_file['filename']):
                            image_ids[image_name] = res_file['id']

            file_filters = self.get_file_filters()
            if self.experiment_metadata:
                extractor_json = self.find_extractor_json()
                if extractor_json:
                    if 'citationAuthor' in extractor_json:
                        citation_auth_override = extractor_json[
                            'citationAuthor']
                    if 'citationYear' in extractor_json:
                        citation_year_override = extractor_json['citationYear']
                    if 'citationTitle' in extractor_json:
                        citation_title_override = extractor_json[
                            'citationTitle']
                    if 'noGeostreams' in extractor_json:
                        store_in_geostreams = False
                    if 'noBETYdb' in extractor_json:
                        store_in_betydb = False
                    if 'noCSV' in extractor_json:
                        create_csv_files = False

                if 'germplasmName' in self.experiment_metadata:
                    config_specie = self.experiment_metadata['germplasmName']

            # Create the output files
            rootdir = self.sensors.create_sensor_path(timestamp,
                                                      sensor=SENSOR_NAME,
                                                      ext=".csv",
                                                      opts=[experiment_name])
            (bety_fields, bety_traits) = get_bety_traits_table()
            (geo_fields, geo_traits) = get_geo_traits_table()

            if create_csv_files:
                out_geo = os.path.splitext(
                    rootdir)[0] + "_" + SENSOR_NAME + "_geo.csv"
                self.log_info(resource,
                              "Writing Geostreams CSV to %s" % out_geo)
                out_csv = os.path.splitext(
                    rootdir)[0] + "_" + SENSOR_NAME + ".csv"
                self.log_info(resource,
                              "Writing Shapefile CSV to %s" % out_csv)

            # Setup default trait values
            if not config_specie is None:
                bety_traits['species'] = config_specie
            if not citation_auth_override is None:
                bety_traits['citation_author'] = citation_auth_override
            if not citation_title_override is None:
                bety_traits['citation_title'] = citation_title_override
            if not citation_year_override is None:
                bety_traits['citation_year'] = citation_year_override
            else:
                bety_traits['citation_year'] = datestamp[:4]

            bety_csv_header = ','.join(map(str, bety_fields))
            geo_csv_header = ','.join(map(str, geo_fields))

            # Loop through all the images (of which there should be one - see above)
            geo_rows = []
            bety_rows = []
            len_field_value = len(FIELD_NAME_LIST)
            for filename in imagefiles:

                # Check if we're filtering files
                if file_filters:
                    if not file_filtered_in(filename, file_filters):
                        continue

                try:
                    calc_value = ""

                    # Load the pixels
                    clip_pix = np.array(gdal.Open(filename).ReadAsArray())

                    # Get additional, necessary data
                    centroid = imagefiles[filename]["bounds"].Centroid()
                    plot_name = _get_plot_name(
                        [resource['dataset_info']['name'], dataset_name])

                    calc_value = calculate(np.rollaxis(clip_pix, 0, 3))

                    # Convert to something iterable that's in the correct order
                    if isinstance(calc_value, set):
                        raise RuntimeError("A 'set' type of data was returned and isn't supported. " \
                                           "Please use a list or a tuple instead")
                    elif isinstance(calc_value, dict):
                        # Assume the dictionary is going to have field names with their values
                        # We check whether we have the correct number of fields later. This also
                        # filters out extra fields
                        values = []
                        for key in FIELD_NAME_LIST:
                            if key in calc_value:
                                values.append(calc_value[key])
                    elif not isinstance(calc_value, (list, tuple)):
                        values = [calc_value]

                    # Sanity check our values
                    len_calc_value = len(values)
                    if not len_calc_value == len_field_value:
                        raise RuntimeError(
                            "Incorrect number of values returned. Expected " +
                            str(len_field_value) + " and received " +
                            str(len_calc_value))

                    # Prepare the data for writing
                    image_clowder_id = ""
                    image_name = os.path.basename(filename)
                    if image_name in image_ids:
                        image_clowder_id = image_ids[image_name]
                    geo_traits['site'] = plot_name
                    geo_traits['lat'] = str(centroid.GetY())
                    geo_traits['lon'] = str(centroid.GetX())
                    geo_traits['dp_time'] = localtime
                    geo_traits['source'] = host.rstrip('/') + '/files/' + str(
                        image_clowder_id)
                    geo_traits['timestamp'] = datestamp

                    # Write the data points geographically and otherwise
                    for idx in range(0, len_field_value):
                        # The way the code is configured, Geostreams can only handle one field
                        # at a time so we write out one row per field/value pair
                        geo_traits['trait'] = FIELD_NAME_LIST[idx]
                        geo_traits['value'] = str(values[idx])
                        trait_list = generate_traits_list(
                            geo_fields, geo_traits)
                        csv_data = ','.join(map(str, trait_list))
                        if out_geo:
                            self.write_csv_file(resource, out_geo,
                                                geo_csv_header, csv_data)
                        if store_in_geostreams:
                            geo_rows.append(csv_data)

                        # BETYdb can handle wide rows with multiple values so we just set the field
                        # values here and write the single row after the loop
                        bety_traits[FIELD_NAME_LIST[idx]] = str(values[idx])

                    bety_traits['site'] = plot_name
                    bety_traits['local_datetime'] = localtime
                    trait_list = generate_traits_list(bety_fields, bety_traits)
                    csv_data = ','.join(map(str, trait_list))
                    if out_csv:
                        self.write_csv_file(resource, out_csv, bety_csv_header,
                                            csv_data)
                    if store_in_betydb:
                        bety_rows.append(csv_data)

                except Exception as ex:
                    self.log_error(
                        resource, "error generating " + EXTRACTOR_NAME +
                        " for %s" % plot_name)
                    self.log_error(resource, "    exception: %s" % str(ex))
                    continue

                # Only process the first file that's valid
                if num_image_files > 1:
                    self.log_info(
                        resource,
                        "Multiple image files were found, only using first found"
                    )
                    break

            # Upload any geostreams or betydb data
            if store_in_geostreams:
                if geo_rows:
                    update_geostreams(connector, host, secret_key,
                                      geo_csv_header, geo_rows)
                else:
                    self.log_info(
                        resource, "No geostreams data was generated to upload")

            if store_in_betydb:
                if bety_rows:
                    update_betydb(bety_csv_header, bety_rows)
                else:
                    self.log_info(resource,
                                  "No BETYdb data was generated to upload")

            # Update this dataset with the extractor info
            dataset_id = self.get_dataset_id(host, secret_key, resource,
                                             dataset_name)
            try:
                # Tell Clowder this is completed so subsequent file updates don't daisy-chain
                self.log_info(resource, "updating dataset metadata")
                content = {
                    "comment": "Calculated " + SENSOR_NAME + " index",
                    SENSOR_NAME + " value": calc_value
                }
                if self.experiment_metadata:
                    content.update(
                        prepare_pipeline_metadata(self.experiment_metadata))
                extractor_md = build_metadata(host, self.extractor_info,
                                              dataset_id, content, 'dataset')
                clowder_dataset.remove_metadata(connector, host, secret_key,
                                                dataset_id,
                                                self.extractor_info['name'])
                clowder_dataset.upload_metadata(connector, host, secret_key,
                                                dataset_id, extractor_md)

            except Exception as ex:
                self.log_error(
                    resource,
                    "Exception updating dataset metadata: " + str(ex))
        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if restore_fn:
                restore_fn()
            self.end_message(resource)
	def process_message(self, connector, host, secret_key, resource, parameters):
		self.start_message(resource)

		# TODO: Get this from Clowder fixed metadata
		geom = {
			"type": "Point",
			"coordinates": [-111.974304, 33.075576, 361]
		}
		disp_name = self.sensors.get_display_name()

		# Get sensor or create if not found
		sensor_data = get_sensor_by_name(connector, host, secret_key, disp_name)
		if not sensor_data:
			sensor_id = create_sensor(connector, host, secret_key, disp_name, geom, {
				"id": "MAC Met Station",
				"title": "MAC Met Station",
				"sensorType": 4
			}, "Maricopa")
		else:
			sensor_id = sensor_data['id']

		# Get stream or create if not found
		stream_name = "Weather Observations (5 min bins)"
		stream_data = get_stream_by_name(connector, host, secret_key, stream_name)
		if not stream_data:
			stream_id = create_stream(connector, host, secret_key, stream_name, sensor_id, geom)
		else:
			stream_id = stream_data['id']

		# Process each file and concatenate results together.
		datasetUrl = urlparse.urljoin(host, 'datasets/%s' % resource['id'])
		ISO_8601_UTC_OFFSET = dateutil.tz.tzoffset("-07:00", -7 * 60 * 60)
		#! Files should be sorted for the aggregation to work.
		aggregationState = None
		lastAggregatedFile = None
		target_files = get_all_files(resource)
		datapoint_count = 0
		# To work with the aggregation process, add an extra NULL file to indicate we are done with all the files.
		for file in (list(target_files) + [ None ]):
			if file == None:
				# We are done with all the files, pass None to let aggregation wrap up any work left.
				records = None
				fileId = lastAggregatedFile['id']
			else:
				# Add this file to the aggregation.
				for p in resource['local_paths']:
					if os.path.basename(p) == file['filename']:
						filepath = p
				# Parse one file and get all the records in it.
				records = parse_file(filepath, utc_offset=ISO_8601_UTC_OFFSET)
				fileId = file['id']

			aggregationResult = aggregate(
					cutoffSize=self.agg_cutoff,
					tz=ISO_8601_UTC_OFFSET,
					inputData=records,
					state=aggregationState
			)
			aggregationState = aggregationResult['state']
			aggregationRecords = aggregationResult['packages']

			# Add props to each record.
			datapoint_list = []
			for record in aggregationRecords:
				record['properties']['source'] = datasetUrl
				record['properties']['source_file'] = fileId
				cleaned_properties = {}
				# Check for nan values from the stream
				for prop in record['properties']:
					val = record['properties'][prop]
					if not (type(val) == float and math.isnan(val)):
						cleaned_properties[prop] = val
				datapoint_list.append({
					"start_time": record['start_time'],
					"end_time": record['end_time'],
					"type": "Point",
					"geometry": record['geometry'],
					"properties": cleaned_properties
				})
				if len(datapoint_list) > self.batchsize:
					create_datapoints(connector, host, secret_key, stream_id, datapoint_list)
					datapoint_count += len(datapoint_list)
					datapoint_list = []
			if len(datapoint_list) > 0:
				create_datapoints(connector, host, secret_key, stream_id, datapoint_list)
				datapoint_count += len(datapoint_list)

			lastAggregatedFile = file

		# Mark dataset as processed
		metadata = build_metadata(host, self.extractor_info, resource['id'], {
			"datapoints_created": datapoint_count}, 'dataset')
		upload_metadata(connector, host, secret_key, resource['id'], metadata)

		self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        terra_md = resource['metadata']
        ds_info = get_info(connector, host, secret_key, resource['id'])

        # @begin extract_positional_info_from_metadata
        # @in new_dataset_added
        # @out gantry_geometry
        # @end extract_positional_info

        # Get sensor from datasetname
        self.log_info(resource, "Getting position information from metadata")
        (streamprefix, timestamp) = ds_info['name'].split(' - ')
        date = timestamp.split("__")[0]
        scan_time = calculate_scan_time(terra_md)
        streamprefix += " Datasets"
        dpmetadata = {
         "source_dataset": host + ("" if host.endswith("/") else "/") + \
               "datasets/" + resource['id'],
         "dataset_name": ds_info['name']
        }

        centroid = None
        bbox = None
        for entry in terra_md['spatial_metadata']:
            if 'centroid' in terra_md['spatial_metadata'][entry]:
                centroid = terra_md['spatial_metadata'][entry]['centroid']
            if 'bounding_box' in terra_md['spatial_metadata'][entry]:
                bbox = terra_md['spatial_metadata'][entry]['bounding_box']
                bbox = {
                    "type": bbox['type'],
                    "coordinates": [bbox['coordinates']]
                }

        if 'site_metadata' in terra_md:
            # We've already determined the plot associated with this dataset so we can skip some work
            self.log_info(
                resource,
                "Creating datapoint without lookup in %s" % streamprefix)
            create_datapoint_with_dependencies(
                connector, host, secret_key, streamprefix, centroid, scan_time,
                scan_time, dpmetadata, date, bbox,
                terra_md['site_metadata']['sitename'])

        else:
            # We need to do the traditional querying for plot
            self.log_info(
                resource,
                "Creating datapoint with lookup in %s" % streamprefix)
            create_datapoint_with_dependencies(connector, host, secret_key,
                                               streamprefix, centroid,
                                               scan_time, scan_time,
                                               dpmetadata, date, bbox)

        # Attach geometry to Clowder metadata as well
        self.log_info(resource, "Uploading dataset metadata")
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"datapoints_added": 1}, 'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        self.end_message(resource)
Ejemplo n.º 26
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        # Get full list of experiment plots using date as filter
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        all_plots = get_site_boundaries(timestamp, city='Maricopa')

        successful_plots = 0
        for plotname in all_plots:
            bounds = all_plots[plotname]

            # Use GeoJSON string to clip full field to this plot
            try:
                (pxarray, geotrans) = clip_raster(resource['local_paths'][0],
                                                  bounds)
                if len(pxarray.shape) < 3:
                    logging.error("unexpected array shape for %s (%s)" %
                                  (plotname, pxarray.shape))
                    continue

                plot_img = create_image(pxarray, "plot_image.png")
                plot_csv = "plot.csv"
                self.generate_table_only(plot_img, plot_csv)
                trait_vals = self.extract_vals_from_csv(plot_csv)

                successful_plots += 1
                if successful_plots % 10 == 0:
                    logging.info("processed %s/%s plots successfully" %
                                 (successful_plots, len(all_plots)))
            except:
                logging.error("error generating traits for %s" % plotname)
                continue

            # Create BETY-ready CSV
            (fields, traits) = self.get_traits_table()
            for tr in trait_vals:
                traits[tr] = str(trait_vals[tr])
            traits['site'] = plotname
            traits['local_datetime'] = timestamp + "T12-00-00-000"
            trait_list = self.generate_traits_list(traits)
            self.generate_cc_csv(plot_csv, fields, trait_list)

            # submit CSV to BETY
            submit_traits(plot_csv, self.bety_key)

            # Prepare and submit datapoint
            centroid_lonlat = json.loads(
                centroid_from_geojson(bounds))["coordinates"]
            time_fmt = timestamp + "T12:00:00-07:00"
            dpmetadata = {
                "source": host + "files/" + resource['id'],
            }
            for tr in trait_vals:
                dpmetadata[tr] = str(trait_vals[tr])
            create_datapoint_with_dependencies(
                connector, host, secret_key, "Canopy Cover",
                (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
                dpmetadata, timestamp)

            os.remove(plot_img)
            os.remove(plot_csv)

        # Add metadata to original dataset indicating this was run
        ext_meta = build_metadata(
            host,
            self.extractor_info,
            resource['parent']['id'],
            {
                "plots_processed": successful_plots,
                "plots_skipped": len(all_plots) - successful_plots
                # TODO: add link to BETY trait IDs
            },
            'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        ext_meta)

        self.end_message()
Ejemplo n.º 27
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Load metadata from dataset
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd)
                if 'spatial_metadata' in terra_md_full:
                    spatial_meta = terra_md_full['spatial_metadata']
                else:
                    spatial_meta = None
        if not spatial_meta:
            ValueError("No spatial metadata found.")

        # Determine which files in dataset need clipping
        files_to_process = {}
        for f in resource['local_paths']:
            if f.startswith("ir_geotiff") and f.endswith(".tif"):
                sensor_name = "ir_geotiff"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta['flirIrCamera']['bounding_box']
                }

            elif f.startswith("rgb_geotiff") and f.endswith(".tif"):
                sensor_name = "rgb_geotiff"
                filename = os.path.basename(f)
                if f.endswith("_left.tif"): side = "left"
                else:                       side = "right"
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta[side]['bounding_box']
                }

            elif f.endswith(".las"):
                sensor_name = "laser3d_las"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": get_las_extents(f)
                }

            # TODO: Add case for laser3d heightmap

        # Fetch experiment name from terra metadata
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine script name
        target_scan = "unknown_scan"
        if 'gantry_variable_metadata' in terra_md_full:
            if 'script_name' in terra_md_full['gantry_variable_metadata']:
                target_scan = terra_md_full['gantry_variable_metadata']['script_name']
                if 'script_hash' in terra_md_full['gantry_variable_metadata']:
                    target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash']

        all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa')
        uploaded_file_ids = []

        for filename in files_to_process:
            file_path = files_to_process[filename]["path"]
            file_bounds = files_to_process[filename]["bounds"]

            overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True)

            if len(overlap_plots) > 0:
                self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots)))
                for plotname in overlap_plots:
                    plot_bounds = overlap_plots[plotname]
                    tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds))

                    plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)"
                    leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0]
                    self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name,
                                                                                             timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset))
                    target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                                                season_name, experiment_name, plot_display_name,
                                                                timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset)

                    out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename)
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite):
                        """If file is a geoTIFF, simply clip it and upload it to Clowder"""
                        clip_raster(file_path, tuples, out_path=out_file)

                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                    elif filename.endswith(".las"):
                        """If file is LAS, we can merge with any existing scan+plot output safely"""
                        merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las")
                        merged_txt = merged_out.replace(".las", "_contents.txt")

                        already_merged = False
                        if os.path.exists(merged_txt):
                            # Check if contents
                            with open(merged_txt, 'r') as contents:
                                for entry in contents.readlines():
                                    if entry.strip() == file_path:
                                        already_merged = True
                                        break
                        if not already_merged:
                            clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out)
                            with open(merged_txt, 'a') as contents:
                                contents.write(file_path+"\n")

                        # Upload the individual plot shards for optimizing las2height later
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(out_file)

                        # Upload the merged result if necessary
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                        # Trigger las2height extractor
                        submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height")


        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        extractor_md = build_metadata(host, self.extractor_info, resource['id'], {
            "files_created": uploaded_file_ids
        }, 'dataset')
        self.log_info(resource, "uploading extractor metadata to Level_1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)
Ejemplo n.º 28
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']
        left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        self.log_info(resource, "determining image quality")
        left_qual = getImageQuality(img_left)
        if not self.leftonly:
            right_qual = getImageQuality(img_right)

        left_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box'])

        if not file_exists(left_nrmac_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % left_nrmac_tiff)
            create_geotiff(np.array([[left_qual, left_qual],[left_qual, left_qual]]), left_bounds,
                           left_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(left_nrmac_tiff)
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_nrmac_tiff,
                                              remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            self.log_info(resource, "uploading %s" % left_nrmac_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid,
                                       left_nrmac_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)


        if not self.leftonly:
            if (not file_exists(right_nrmac_tiff) or self.overwrite):
                self.log_info(resource, "creating %s" % right_nrmac_tiff)
                create_geotiff(np.array([[right_qual, right_qual],[right_qual, right_qual]]), right_bounds,
                               right_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True)
                self.created += 1
                self.bytes += os.path.getsize(right_nrmac_tiff)
            found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_nrmac_tiff,
                                                  remove=self.overwrite)
            if not found_in_dest or self.overwrite:
                self.log_info(resource, "uploading %s" % right_nrmac_tiff)
                fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid,
                                           right_nrmac_tiff)
                uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        md = {
            "files_created": uploaded_file_ids,
            "left_quality_score": left_qual
        }
        if not self.leftonly:
            md["right_quality_score"] = right_qual
        extractor_md = build_metadata(host, self.extractor_info, resource['id'], md, 'file')
        self.log_info(resource, "uploading extractor metadata to Lv1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)