Python check_file_in_dataset Examples

Programming Language: Python

Namespace/Package Name: terrautils.extractors

Method/Function: check_file_in_dataset

Examples at hotexamples.com: 11

Python check_file_in_dataset - 11 examples found. These are the top rated real world Python examples of terrautils.extractors.check_file_in_dataset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: terra_bin2tif.py Project: terraref/extractors-stereo-rgb

    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, terra_md_full = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.bin'):
                img_left = fname
            elif fname.endswith('_right.bin'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product if necessary
        target_md = download_metadata(connector, host, secret_key, target_dsid)
        if not get_extractor_metadata(target_md, self.extractor_info['name']):
            self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
            remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
            terra_md_trim = get_terraref_metadata(all_dsmd)
            if updated_experiment is not None:
                terra_md_trim['experiment_metadata'] = updated_experiment
            terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
            level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
            upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        try:
            left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left')
            gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box'])
            right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right')
            gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box'])
        except KeyError:
            self.log_error(resource, "spatial metadata not properly identified; sending to cleaner")
            submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner")
            return

        if (not file_exists(left_tiff)) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating %s" % left_tiff)
            left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None)
            create_geotiff(left_image, gps_bounds_left, left_tiff, None, True,
                           self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(left_tiff)
        # Check if the file should be uploaded, even if it was already created
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)


        if (not file_exists(right_tiff)) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating %s" % right_tiff)
            right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None)
            create_geotiff(right_image, gps_bounds_right, right_tiff, None, True,
                           self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(right_tiff)
        # Check if the file should be uploaded, even if it was already created
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % right_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            try:
                upload_metadata(connector, host, secret_key, resource['id'], extractor_md)
            except:
                self.log_info(resource, "problem uploading extractor metadata...")

        self.end_message(resource)

Example #2

Show file

    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Load metadata from dataset
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd)
                if 'spatial_metadata' in terra_md_full:
                    spatial_meta = terra_md_full['spatial_metadata']
                else:
                    spatial_meta = None
        if not spatial_meta:
            ValueError("No spatial metadata found.")

        # Determine which files in dataset need clipping
        files_to_process = {}
        for f in resource['local_paths']:
            if f.startswith("ir_geotiff") and f.endswith(".tif"):
                sensor_name = "ir_geotiff"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta['flirIrCamera']['bounding_box']
                }

            elif f.startswith("rgb_geotiff") and f.endswith(".tif"):
                sensor_name = "rgb_geotiff"
                filename = os.path.basename(f)
                if f.endswith("_left.tif"): side = "left"
                else:                       side = "right"
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta[side]['bounding_box']
                }

            elif f.endswith(".las"):
                sensor_name = "laser3d_las"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": get_las_extents(f)
                }

            # TODO: Add case for laser3d heightmap

        # Fetch experiment name from terra metadata
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine script name
        target_scan = "unknown_scan"
        if 'gantry_variable_metadata' in terra_md_full:
            if 'script_name' in terra_md_full['gantry_variable_metadata']:
                target_scan = terra_md_full['gantry_variable_metadata']['script_name']
                if 'script_hash' in terra_md_full['gantry_variable_metadata']:
                    target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash']

        all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa')
        uploaded_file_ids = []

        for filename in files_to_process:
            file_path = files_to_process[filename]["path"]
            file_bounds = files_to_process[filename]["bounds"]

            overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True)

            if len(overlap_plots) > 0:
                self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots)))
                for plotname in overlap_plots:
                    plot_bounds = overlap_plots[plotname]
                    tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds))

                    plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)"
                    leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0]
                    self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name,
                                                                                             timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset))
                    target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                                                season_name, experiment_name, plot_display_name,
                                                                timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset)

                    out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename)
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite):
                        """If file is a geoTIFF, simply clip it and upload it to Clowder"""
                        clip_raster(file_path, tuples, out_path=out_file)

                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                    elif filename.endswith(".las"):
                        """If file is LAS, we can merge with any existing scan+plot output safely"""
                        merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las")
                        merged_txt = merged_out.replace(".las", "_contents.txt")

                        already_merged = False
                        if os.path.exists(merged_txt):
                            # Check if contents
                            with open(merged_txt, 'r') as contents:
                                for entry in contents.readlines():
                                    if entry.strip() == file_path:
                                        already_merged = True
                                        break
                        if not already_merged:
                            clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out)
                            with open(merged_txt, 'a') as contents:
                                contents.write(file_path+"\n")

                        # Upload the individual plot shards for optimizing las2height later
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(out_file)

                        # Upload the merged result if necessary
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                        # Trigger las2height extractor
                        submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height")


        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        extractor_md = build_metadata(host, self.extractor_info, resource['id'], {
            "files_created": uploaded_file_ids
        }, 'dataset')
        self.log_info(resource, "uploading extractor metadata to Level_1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)

Example #3

Show file

File: terra_rgbmask.py Project: Chris-Schnaufer/tt

    def process_message(self, connector, host, secret_key, resource, parameters):

        super(rgbEnhancementExtractor, self).process_message(connector, host, secret_key,
                                                             resource, parameters)

        self.start_message(resource)

        # Get left/right files and metadata
        process_files = []
        if not self.get_terraref_metadata is None:
            process_files = find_terraref_files(resource)
        else:
            process_files = find_image_files(self.args.identify_binary, resource,
                                             self.file_infodata_file_ending)

        # Get the best username, password, and space
        old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace)
        self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context()

        # Ensure that the clowder information is valid
        if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user,
                                    self.clowder_pass):
            self.log_error(resource, "Clowder configuration is invalid. Not processing " +\
                                     "request")
            self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space)
            self.end_message(resource)
            return

        # Change the base path of files to include the user by tweaking the sensor's value
        sensor_old_base = None
        if self.get_terraref_metadata is None:
            _, new_base = self.get_username_with_base_path(host, secret_key, resource['id'],
                                                           self.sensors.base)
            sensor_old_base = self.sensors.base
            self.sensors.base = new_base

        # Prepare for processing files
        timestamp = timestamp_to_terraref(self.find_timestamp(resource['dataset_info']['name']))
        target_dsid = resource['id']
        uploaded_file_ids = []
        ratios = []

        try:
            for one_file in process_files:

                mask_source = one_file

                # Make sure the source image is in the correct EPSG space
                epsg = get_epsg(one_file)
                if epsg != self.default_epsg:
                    self.log_info(resource, "Reprojecting from " + str(epsg) +
                                  " to default " + str(self.default_epsg))
                    _, tmp_name = tempfile.mkstemp()
                    src = gdal.Open(one_file)
                    gdal.Warp(tmp_name, src, dstSRS='EPSG:'+str(self.default_epsg))
                    mask_source = tmp_name

                # Get the bounds of the image to see if we can process it. Also get the mask filename
                rgb_mask_tif, bounds = self.get_maskfilename_bounds(mask_source, timestamp)

                if bounds is None:
                    self.log_skip(resource, "Skipping non-georeferenced image: " + \
                                                                    os.path.basename(one_file))
                    if mask_source != one_file:
                        os.remove(mask_source)
                    continue

                if not file_exists(rgb_mask_tif) or self.overwrite:
                    self.log_info(resource, "creating %s" % rgb_mask_tif)

                    mask_ratio, mask_rgb = gen_cc_enhanced(mask_source)
                    ratios.append(mask_ratio)

                    # Bands must be reordered to avoid swapping R and B
                    mask_rgb = cv2.cvtColor(mask_rgb, cv2.COLOR_BGR2RGB)

                    create_geotiff(mask_rgb, bounds, rgb_mask_tif, None, False, self.extractor_info,
                                   self.get_terraref_metadata)
                    compress_geotiff(rgb_mask_tif)

                    # Remove any temporary file
                    if mask_source != one_file:
                        os.remove(mask_source)

                    self.created += 1
                    self.bytes += os.path.getsize(rgb_mask_tif)

                found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid,
                                                      rgb_mask_tif, remove=self.overwrite)
                if not found_in_dest:
                    self.log_info(resource, "uploading %s" % rgb_mask_tif)
                    fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass,
                                               target_dsid, rgb_mask_tif)
                    uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") +
                                             "files/" + fileid)

            # Tell Clowder this is completed so subsequent file updates don't daisy-chain
            if not self.get_terraref_metadata is None:
                ratios_len = len(ratios)
                left_ratio = (ratios[0] if ratios_len > 0 else None)
                right_ratio = (ratios[1] if ratios_len > 1 else None)
                md = {
                    "files_created": uploaded_file_ids
                }
                if not left_ratio is None:
                    md["left_mask_ratio"] = left_ratio
                if not self.leftonly and not right_ratio is None:
                    md["right_mask_ratio"] = right_ratio
                extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset')
                self.log_info(resource, "uploading extractor metadata to Lv1 dataset")
                remove_metadata(connector, host, secret_key, resource['id'],
                                self.extractor_info['name'])
                upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if not sensor_old_base is None:
                self.sensors.base = sensor_old_base

            self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space)
            self.end_message(resource)

Example #4

Show file

    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # rulechecker provided some key information for us in parameters
        if type(parameters) is str:
            parameters = json.loads(parameters)
        if 'parameters' in parameters:
            parameters = parameters['parameters']
        if type(parameters) is unicode:
            parameters = json.loads(str(parameters))
        dataset_name = parameters["output_dataset"]
        scan_name = parameters["scan_type"] if "scan_type" in parameters else ""

        timestamp = dataset_name.split(" - ")[1]

        # Input path will suggest which sensor we are seeing
        sensor_name, sensor_lookup = None, None
        for f in resource['files']:
            if f['filepath'].find("rgb_geotiff") > -1:
                sensor_name = "stereoTop"
                sensor_lookup = "rgb_fullfield"
            elif f['filepath'].find("ir_geotiff") > -1:
                sensor_name = "flirIrCamera"
                sensor_lookup = "ir_fullfield"
            elif f['filepath'].find("laser3d_heightmap") > -1:
                sensor_name = "scanner3DTop"
                sensor_lookup = "laser3d_fullfield"
            if sensor_lookup is not None:
                break

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(
            timestamp, sensor_name, {})
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output file paths
        out_tif_full = self.sensors.create_sensor_path(
            timestamp, sensor=sensor_lookup,
            opts=[scan_name]).replace(" ", "_")
        out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif")
        out_tif_medium = out_tif_full.replace(".tif", "_10pct.tif")
        out_png = out_tif_full.replace(".tif", ".png")
        out_vrt = out_tif_full.replace(".tif", ".vrt")
        out_dir = os.path.dirname(out_vrt)

        # TODO: Check for L1 version of VRT and _thumb and if the JSON contents match, copy instead of regenerating

        # If outputs already exist, we don't need to do anything else
        found_all = True
        if self.thumb:
            output_files = [out_vrt, out_tif_thumb]
        else:
            output_files = [out_tif_full, out_tif_medium, out_png]
        for output_file in output_files:
            if not file_exists(output_file):
                found_all = False
                break
        if found_all and not self.overwrite:
            if self.thumb:
                self.log_info(
                    resource,
                    "thumb output already exists; triggering terra.geotiff.fieldmosaic_full"
                )
                r = requests.post(
                    "%sapi/%s/%s/extractions?key=%s" %
                    (host, 'datasets', resource['id'], secret_key),
                    headers={"Content-Type": "application/json"},
                    data=json.dumps({
                        "extractor": 'terra.geotiff.fieldmosaic_full',
                        "parameters": parameters
                    }))
                r.raise_for_status()
            else:
                self.log_skip(resource, "all outputs already exist")
            return

        # Perform actual field stitching
        if not self.darker or sensor_lookup != 'rgb_fullfield':
            (nu_created, nu_bytes) = self.generateSingleMosaic(
                connector, host, secret_key, out_dir, out_vrt, out_tif_thumb,
                out_tif_full, out_tif_medium, parameters, resource)
        else:
            (nu_created, nu_bytes) = self.generateDarkerMosaic(
                connector, host, secret_key, out_dir, out_vrt, out_tif_thumb,
                out_tif_full, out_tif_medium, parameters, resource)
        self.created += nu_created
        self.bytes += nu_bytes

        if not self.thumb and os.path.isfile(out_tif_medium):
            # Create PNG thumbnail
            self.log_info(resource, "Converting 10pct to %s..." % out_png)
            cmd = "gdal_translate -of PNG %s %s" % (out_tif_medium, out_png)
            subprocess.call(cmd, shell=True)
            self.created += 1
            self.bytes += os.path.getsize(out_png)

        self.log_info(
            resource, "Hierarchy: %s / %s / %s / %s / %s" %
            (season_name, experiment_name,
             self.sensors.get_display_name(sensor=sensor_lookup),
             timestamp[:4], timestamp[5:7]))

        # Get dataset ID or create it, creating parent collections as needed
        target_dsid = build_dataset_hierarchy_crawl(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            season_name,
            experiment_name,
            self.sensors.get_display_name(sensor=sensor_lookup),
            timestamp[:4],
            timestamp[5:7],
            leaf_ds_name=dataset_name)

        # Upload full field image to Clowder
        content = {
            "comment":
            "This stitched image is computed based on an assumption that the scene is planar. \
                There are likely to be be small offsets near the boundary of two images anytime there are plants \
                at the boundary (because those plants are higher than the ground plane), or where the dirt is \
                slightly higher or lower than average.",
            "file_ids": parameters["file_paths"]
        }

        # If we newly created these files, upload to Clowder
        if self.thumb:
            generated_files = [out_tif_thumb]
        else:
            generated_files = [out_tif_medium, out_tif_full, out_png]
        for checked_file in generated_files:
            if os.path.isfile(checked_file):
                found_in_dest = check_file_in_dataset(connector, host,
                                                      secret_key, target_dsid,
                                                      checked_file)
                #, replacements=[("ir_fullfield", "fullfield"), ("L2", "L1")])
                if not found_in_dest:
                    id = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           checked_file)
                    meta = build_metadata(host, self.extractor_info, id,
                                          content, 'file')
                    upload_metadata(connector, host, secret_key, id, meta)

                    if checked_file == out_tif_full:
                        # Trigger downstream extractions on full resolution
                        if sensor_lookup == 'ir_fullfield':
                            submit_extraction(connector, host, secret_key, id,
                                              "terra.multispectral.meantemp")
                        elif sensor_lookup == 'rgb_fullfield' and checked_file.endswith(
                                "_mask.tif"):
                            submit_extraction(connector, host, secret_key, id,
                                              "terra.stereo-rgb.canopycover")

        if self.thumb:
            # TODO: Add parameters support to pyclowder submit_extraction()
            self.log_info(resource,
                          "triggering terra.geotiff.fieldmosaic_full")
            r = requests.post("%sapi/%s/%s/extractions?key=%s" %
                              (host, 'datasets', resource['id'], secret_key),
                              headers={"Content-Type": "application/json"},
                              data=json.dumps({
                                  "extractor":
                                  'terra.geotiff.fieldmosaic_full',
                                  "parameters": parameters
                              }))
            r.raise_for_status()

        self.end_message(resource)

Example #5

Show file

    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get BIN file and metadata
        bin_file, terra_md_full = None, None
        for f in resource['local_paths']:
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera')
            elif f.endswith('_ir.bin'):
                bin_file = f
        if None in [bin_file, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp)
        tiff_path = self.sensors.create_sensor_path(timestamp)
        png_path = tiff_path.replace(".tif", ".png")
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product
        self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
        remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
        terra_md_trim = get_terraref_metadata(all_dsmd)
        if updated_experiment is not None:
            terra_md_trim['experiment_metadata'] = updated_experiment
        terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
        level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        skipped_png = False
        if not file_exists(png_path) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating & uploading %s" % png_path)
            raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
            raw_data = numpy.rot90(raw_data, 3)
            create_image(raw_data, png_path, self.scale_values)
            self.created += 1
            self.bytes += os.path.getsize(png_path)
        else:
            skipped_png = True
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        if not file_exists(tiff_path) or self.overwrite:
            # Generate temperature matrix and perform actual processing
            self.log_info(resource, "creating & uploading %s" % tiff_path)
            gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box'])
            if skipped_png:
                raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
                raw_data = numpy.rot90(raw_data, 3)
            tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature
            create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full)
            self.created += 1
            self.bytes += os.path.getsize(tiff_path)
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)

Example #6

Show file

File: terra_clipbyshape.py Project: Chris-Schnaufer/extractor_travis_test

    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        """Performs plot level image extraction

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            resource(dict): dictionary containing the resources associated with the request
            parameters(json): json object of the triggering message contents
        """
        self.start_message(resource)
        super(ClipByShape, self).process_message(connector, host, secret_key,
                                                 resource, parameters)

        # Handle any parameters
        if isinstance(parameters, basestring):
            parameters = json.loads(parameters)
        if isinstance(parameters, unicode):
            parameters = json.loads(str(parameters))

        # Initialize local variables
        dataset_name = parameters["datasetname"]
        season_name, experiment_name = "Unknown Season", "Unknown Experiment"
        datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None

        # Array containing the links to uploaded files
        uploaded_file_ids = []

        # Find the files we're interested in
        # pylint: disable=line-too-long
        (shapefile, shxfile, dbffile,
         imagefiles) = self.find_shape_image_files(resource['local_paths'],
                                                   resource['triggering_file'])
        # pylint: enable=line-too-long
        if shapefile is None:
            self.log_skip(resource, "No shapefile found")
            return
        if shxfile is None:
            self.log_skip(resource, "No SHX file found")
            return
        num_image_files = len(imagefiles)
        if num_image_files <= 0:
            self.log_skip(resource,
                          "No image files with geographic boundaries found")
            return

        # Get the best username, password, and space
        old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass,
                                     self.clowderspace)
        self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context(
        )

        # Ensure that the clowder information is valid
        if not confirm_clowder_info(host, secret_key, self.clowderspace,
                                    self.clowder_user, self.clowder_pass):
            self.log_error(resource, "Clowder configuration is invalid. Not processing " +\
                                     "request")
            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)
            return

        # Change the base path of files to include the user by tweaking the sensor's value
        sensor_old_base = None
        if self.get_terraref_metadata is None:
            _, new_base = self.get_username_with_base_path(
                host, secret_key, resource['id'], self.sensors.base)
            sensor_old_base = self.sensors.base
            self.sensors.base = new_base

        try:
            # Build up a list of image IDs
            image_ids = {}
            if 'files' in resource:
                for one_image in imagefiles:
                    image_name = os.path.basename(one_image)
                    for res_file in resource['files']:
                        if ('filename' in res_file) and ('id' in res_file) and \
                                                            (image_name == res_file['filename']):
                            image_ids[image_name] = res_file['id']

            # Get timestamps. Also get season and experiment information for Clowder collections
            datestamp = self.find_datestamp(dataset_name)
            timestamp = timestamp_to_terraref(
                self.find_timestamp(dataset_name))
            (season_name, experiment_name,
             _) = self.get_season_and_experiment(datestamp, self.sensor_name)

            if self.experiment_metadata:
                if 'extractors' in self.experiment_metadata:
                    extractor_json = self.experiment_metadata['extractors']
                    if 'shapefile' in extractor_json:
                        if 'plot_column_name' in extractor_json['shapefile']:
                            plot_name_idx = extractor_json['shapefile'][
                                'plot_column_name']

            # Check our current local variables
            if dbffile is None:
                self.log_info(resource,
                              "DBF file not found, using default plot naming")
            self.log_info(resource, "Extracting plots using shapefile '" + \
                                                        os.path.basename(shapefile) + "'")

            # Load the shapes and find the plot name column if we have a DBF file
            shape_in = ogr.Open(shapefile)
            layer = shape_in.GetLayer(
                os.path.split(os.path.splitext(shapefile)[0])[1])
            feature = layer.GetNextFeature()
            layer_ref = layer.GetSpatialRef()

            if dbffile:
                shape_table = DBF(dbffile,
                                  lowernames=True,
                                  ignore_missing_memofile=True)
                shape_rows = iter(list(shape_table))

                # Make sure if we have the column name of plot-names specified that it exists in
                # the shapefile
                column_names = shape_table.field_names
                if not plot_name_idx is None:
                    if not find_all_plot_names(plot_name_idx, column_names):
                        ValueError(
                            resource,
                            "Shapefile data does not have specified plot name"
                            + " column '" + plot_name_idx + "'")

                # Lookup a plot name field to use
                if plot_name_idx is None:
                    for one_name in column_names:
                        # pylint: disable=line-too-long
                        if one_name == "observationUnitName":
                            plot_name_idx = one_name
                            break
                        elif (one_name.find('plot') >= 0) and (
                            (one_name.find('name') >= 0)
                                or one_name.find('id')):
                            plot_name_idx = one_name
                            break
                        elif one_name == 'id':
                            plot_name_idx = one_name
                            break
                        # pylint: enable=line-too-long
                if plot_name_idx is None:
                    ValueError(
                        resource,
                        "Shapefile data does not have a plot name field '" +
                        os.path.basename(dbffile) + "'")

            # Setup for the extracted plot images
            plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \
                                                                                    " (By Plot)"

            # Loop through each polygon and extract plot level data
            alternate_plot_id = 0
            while feature:

                # Current geometry to extract
                plot_poly = feature.GetGeometryRef()
                if layer_ref:
                    plot_poly.AssignSpatialReference(layer_ref)
                plot_spatial_ref = plot_poly.GetSpatialReference()

                # Determie the plot name to use
                plot_name = None
                alternate_plot_id = alternate_plot_id + 1
                if shape_rows and plot_name_idx:
                    try:
                        row = next(shape_rows)
                        plot_name = get_plot_name(plot_name_idx, row)
                    except StopIteration:
                        pass
                if not plot_name:
                    plot_name = "plot_" + str(alternate_plot_id)

                # Determine output dataset name
                leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp
                self.log_info(
                    resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
                    (season_name, experiment_name, plot_display_name,
                     datestamp[:4], datestamp[5:7], datestamp[8:10],
                     leaf_dataset))

                # Create the dataset, even if we have no data to put in it, so that the caller knows
                # it was addressed
                target_dsid = build_dataset_hierarchy_crawl(
                    host,
                    secret_key,
                    self.clowder_user,
                    self.clowder_pass,
                    self.clowderspace,
                    season_name,
                    experiment_name,
                    plot_display_name,
                    datestamp[:4],
                    datestamp[5:7],
                    datestamp[8:10],
                    leaf_ds_name=leaf_dataset)

                # Loop through all the images looking for overlap
                for filename in imagefiles:

                    # Get the bounds. We also get the reference systems in case we need to convert
                    # between them
                    bounds = imagefiles[filename]['bounds']
                    bounds_spatial_ref = bounds.GetSpatialReference()

                    # Checking for geographic overlap and skip if there is none
                    if not bounds_spatial_ref.IsSame(plot_spatial_ref):
                        # We need to convert coordinate system before an intersection
                        transform = osr.CoordinateTransformation(
                            bounds_spatial_ref, plot_spatial_ref)
                        new_bounds = bounds.Clone()
                        if new_bounds:
                            new_bounds.Transform(transform)
                            intersection = plot_poly.Intersection(new_bounds)
                            new_bounds = None
                    else:
                        # Same coordinate system. Simple intersection
                        intersection = plot_poly.Intersection(bounds)

                    if intersection.GetArea() == 0.0:
                        self.log_info(resource, "Skipping image: " + filename)
                        continue

                    # Determine where we're putting the clipped file on disk and determine overwrite
                    # pylint: disable=unexpected-keyword-arg
                    out_file = self.sensors.create_sensor_path(
                        timestamp,
                        filename=os.path.basename(filename),
                        plot=plot_name,
                        subsensor=self.sensor_name)
                    if (file_exists(out_file) and not self.overwrite):
                        # The file exists and don't want to overwrite it
                        self.logger.warn("Skipping existing output file: %s",
                                         out_file)
                        continue

                    self.log_info(
                        resource, "Attempting to clip '" + filename +
                        "' to polygon number " + str(alternate_plot_id))

                    # Create destination folder on disk if we haven't done that already
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    # Clip the raster
                    bounds_tuple = polygon_to_tuples_transform(
                        plot_poly, bounds_spatial_ref)

                    clip_pix = clip_raster(filename,
                                           bounds_tuple,
                                           out_path=out_file)
                    if clip_pix is None:
                        self.log_error(
                            resource,
                            "Failed to clip image to plot name " + plot_name)
                        continue

                    # Upload the clipped image to the dataset
                    found_in_dest = check_file_in_dataset(
                        connector,
                        host,
                        secret_key,
                        target_dsid,
                        out_file,
                        remove=self.overwrite)
                    if not found_in_dest or self.overwrite:
                        image_name = os.path.basename(filename)
                        content = {
                            "comment":
                            "Clipped from shapefile " +
                            os.path.basename(shapefile),
                            "imageName":
                            image_name
                        }
                        if image_name in image_ids:
                            content['imageID'] = image_ids[image_name]

                        fileid = upload_to_dataset(connector, host,
                                                   self.clowder_user,
                                                   self.clowder_pass,
                                                   target_dsid, out_file)
                        uploaded_file_ids.append(fileid)

                        # Generate our metadata
                        meta = build_metadata(host, self.extractor_info,
                                              fileid, content, 'file')
                        clowder_file.upload_metadata(connector, host,
                                                     secret_key, fileid, meta)
                    else:
                        self.logger.warn(
                            "Skipping existing file in dataset: %s", out_file)

                    self.created += 1
                    self.bytes += os.path.getsize(out_file)

                # Get the next shape to extract
                feature = layer.GetNextFeature()

            # Tell Clowder this is completed so subsequent file updates don't daisy-chain
            id_len = len(uploaded_file_ids)
            if id_len > 0 or self.created > 0:
                extractor_md = build_metadata(
                    host, self.extractor_info, resource['id'],
                    {"files_created": uploaded_file_ids}, 'dataset')
                self.log_info(
                    resource,
                    "Uploading shapefile plot extractor metadata to Level_2 dataset: "
                    + str(extractor_md))
                clowder_dataset.remove_metadata(connector, host, secret_key,
                                                resource['id'],
                                                self.extractor_info['name'])
                clowder_dataset.upload_metadata(connector, host, secret_key,
                                                resource['id'], extractor_md)
            else:
                self.logger.warn(
                    "Skipping dataset metadata updating since no files were loaded"
                )

        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if not sensor_old_base is None:
                self.sensors.base = sensor_old_base

            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)

Example #7

Show file

File: terra_rgbmask.py Project: terraref/extractors-stereo-rgb

    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']

        left_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp,
                                                             opts=['left'])
        right_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp,
                                                              opts=['right'])
        uploaded_file_ids = []
        right_ratio, left_ratio = 0, 0

        left_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['right']['bounding_box'])
        #qual_md = get_extractor_metadata(all_dsmd, "terra.stereo-rgb.nrmac")
        if (not file_exists(left_rgb_mask_tiff)) or self.overwrite:
            self.log_info(resource, "creating %s" % left_rgb_mask_tiff)

            #if qual_md and 'left_quality_score' in qual_md:
            #left_ratio, left_rgb = gen_cc_enhanced(img_left, quality_score=int(qual_md['left_quality_score']))
            left_ratio, left_rgb = gen_cc_enhanced(img_left)

            if left_ratio is not None and left_rgb is not None:
                # Bands must be reordered to avoid swapping R and B
                left_rgb = cv2.cvtColor(left_rgb, cv2.COLOR_BGR2RGB)
                create_geotiff(left_rgb, left_bounds, left_rgb_mask_tiff, None,
                               False, self.extractor_info, terra_md_full)
                compress_geotiff(left_rgb_mask_tiff)
                self.created += 1
                self.bytes += os.path.getsize(left_rgb_mask_tiff)
            else:
                # If the masked version was not generated, delete any old version as well
                self.log_info(
                    resource, "a faulty version exists; deleting %s" %
                    left_rgb_mask_tiff)
                os.remove(left_rgb_mask_tiff)

        found_in_dest = check_file_in_dataset(connector, host, secret_key,
                                              target_dsid, left_rgb_mask_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_rgb_mask_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       left_rgb_mask_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        if not self.leftonly:
            if (not file_exists(right_rgb_mask_tiff)) or self.overwrite:

                right_ratio, right_rgb = gen_cc_enhanced(img_right)

                if right_ratio is not None and right_rgb is not None:
                    # Bands must be reordered to avoid swapping R and B
                    right_rgb = cv2.cvtColor(right_rgb, cv2.COLOR_BGR2RGB)
                    create_geotiff(right_rgb, right_bounds,
                                   right_rgb_mask_tiff, None, False,
                                   self.extractor_info, terra_md_full)
                    compress_geotiff(right_rgb_mask_tiff)
                    self.created += 1
                    self.bytes += os.path.getsize(right_rgb_mask_tiff)
                else:
                    # If the masked version was not generated, delete any old version as well
                    self.log_info(
                        resource, "a faulty version exists; deleting %s" %
                        right_rgb_mask_tiff)
                    os.remove(right_rgb_mask_tiff)

            found_in_dest = check_file_in_dataset(connector, host, secret_key,
                                                  target_dsid,
                                                  right_rgb_mask_tiff)
            if not found_in_dest:
                self.log_info(resource, "uploading %s" % right_rgb_mask_tiff)
                fileid = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           right_rgb_mask_tiff)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            md = {
                "files_created": uploaded_file_ids,
                "left_mask_ratio": left_ratio
            }
            if not self.leftonly:
                md["right_mask_ratio"] = right_ratio
            extractor_md = build_metadata(host, self.extractor_info,
                                          target_dsid, md, 'dataset')
            self.log_info(resource,
                          "uploading extractor metadata to Lv1 dataset")
            remove_metadata(connector, host, secret_key, resource['id'],
                            self.extractor_info['name'])
            upload_metadata(connector, host, secret_key, resource['id'],
                            extractor_md)

        self.end_message(resource)

Example #8

Show file

    def perform_uploads(self, connector, host, secret_key, resource,
                        default_dsid, content, season_name, experiment_name,
                        timestamp):
        """Perform the uploading of all the files we're put onto the upload list

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            default_dsid(str): the default dataset to load files to
            content(str): content information for the files we're uploading
            season_name(str): the name of the season
            experiment_name(str): the name of the experiment
            timestamp(str): the timestamp string associated with the source dataset

        Notes:
            We loop through the files, compressing, and remapping the names as needed.
            If the sensor associated with the file is missing, we upload the file to
            the default dataset. Otherwise, we use the dataset associated with the sensor
            and create the dataset if necessary
        """
        for one_file in self.files_to_upload:
            sourcefile = os.path.join(one_file["source_path"],
                                      one_file["source_name"])

            # Make sure we have the original file and then compress it if needed, or remane is
            if os.path.isfile(sourcefile):
                # make sure we have the full destination path
                if not os.path.exists(one_file["dest_path"]):
                    os.makedirs(one_file["dest_path"])

                resultfile = os.path.join(one_file["dest_path"],
                                          one_file["dest_name"])
                if one_file["compress"]:
                    resultfile = resultfile + ".zip"
                    with open(sourcefile, 'rb') as f_in:
                        with gzip.open(resultfile, 'wb') as f_out:
                            shutil.copyfileobj(f_in, f_out)
                elif not sourcefile == resultfile:
                    shutil.move(sourcefile, resultfile)

                # Find or create the target dataset for this entry if it doesn't exist
                cur_dataset_id = default_dsid
                if "sensor" in one_file:
                    sensor_type = one_file["sensor"]
                    if sensor_type in self.sensor_dsid_map:
                        cur_dataset_id = self.sensor_dsid_map[sensor_type]
                    else:
                        new_sensor = Sensors(base=self.sensors.base,
                                             station=self.sensors.station,
                                             sensor=sensor_type)

                        sensor_leaf_name = new_sensor.get_display_name(
                        ) + ' - ' + timestamp
                        ds_exists = get_datasetid_by_name(
                            host, secret_key, sensor_leaf_name)
                        new_dsid = build_dataset_hierarchy_crawl(
                            host,
                            secret_key,
                            self.clowder_user,
                            self.clowder_pass,
                            self.clowderspace,
                            season_name,
                            experiment_name,
                            new_sensor.get_display_name(),
                            timestamp[:4],
                            timestamp[5:7],
                            timestamp[8:10],
                            leaf_ds_name=sensor_leaf_name)

                        if (self.overwrite_ok
                                or not ds_exists) and self.experiment_metadata:
                            self.update_dataset_extractor_metadata(
                                connector, host, secret_key, new_dsid,
                                prepare_pipeline_metadata(
                                    self.experiment_metadata),
                                self.extractor_info['name'])

                        self.sensor_dsid_map[sensor_type] = new_dsid
                        cur_dataset_id = new_dsid

                # Check if file already exists in the dataset
                file_in_dataset = check_file_in_dataset(connector,
                                                        host,
                                                        secret_key,
                                                        cur_dataset_id,
                                                        resultfile,
                                                        remove=False)

                # If the files is already in the dataset, determine if we need to delete it first
                if self.overwrite_ok and file_in_dataset:
                    # Delete the file from the dataset before uploading the new copy
                    self.log_info(
                        resource,
                        "Removing existing file in dataset " + resultfile)
                    check_file_in_dataset(connector,
                                          host,
                                          secret_key,
                                          cur_dataset_id,
                                          resultfile,
                                          remove=True)
                elif not self.overwrite_ok and file_in_dataset:
                    # We won't overwrite an existing file
                    self.log_skip(
                        resource, "Not overwriting existing file in dataset " +
                        resultfile)
                    continue

                # Upload the file to the dataset
                fid = upload_to_dataset(connector, host, self.clowder_user,
                                        self.clowder_pass, cur_dataset_id,
                                        resultfile)

                # Generate our metadata
                meta = build_metadata(host, self.extractor_info, fid, content,
                                      'file')

                # Upload the meadata to the dataset
                upload_metadata(connector, host, secret_key, fid, meta)

                self.created += 1
                self.bytes += os.path.getsize(resultfile)
            else:
                raise Exception("%s was not found" % sourcefile)

Example #9

Show file

File: terra_image_enhance.py Project: terraref/extractors-stereo-rgb

    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']

        left_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                            opts=['left'])
        right_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                             opts=['right'])
        uploaded_file_ids = []

        left_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['right']['bounding_box'])

        if not file_exists(left_rgb_enh_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % left_rgb_enh_tiff)
            EI = getEnhancedImage(img_left)
            create_geotiff(EI, left_bounds, left_rgb_enh_tiff)
            self.created += 1
            self.bytes += os.path.getsize(left_rgb_enh_tiff)

        found_in_dest = check_file_in_dataset(connector,
                                              host,
                                              secret_key,
                                              target_dsid,
                                              left_rgb_enh_tiff,
                                              remove=self.overwrite)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_rgb_enh_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       left_rgb_enh_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        if not file_exists(right_rgb_enh_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % right_rgb_enh_tiff)
            EI = getEnhancedImage(img_right)
            create_geotiff(EI, right_bounds, right_rgb_enh_tiff)
            self.created += 1
            self.bytes += os.path.getsize(right_rgb_enh_tiff)

        found_in_dest = check_file_in_dataset(connector,
                                              host,
                                              secret_key,
                                              target_dsid,
                                              right_rgb_enh_tiff,
                                              remove=self.overwrite)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % right_rgb_enh_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       right_rgb_enh_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        self.log_info(resource, "uploading extractor metadata")
        remove_metadata(connector, host, secret_key, target_dsid,
                        self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, target_dsid, ext_meta)

        self.end_message(resource)

Example #10

Show file

    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # clean tmp directory from any potential failed previous runs
        flist = os.listdir("/tmp")
        for f in flist:
            try:
                os.remove(os.path.join("/tmp", f))
            except:
                pass
        """ if file is above configured limit, skip it
		max_gb = 24 # RAM has 4x requirement, e.g. 24GB requires 96GB RAM
		for fname in resource['local_paths']:
			if fname.endswith('raw'): rawfile = fname
		rawsize = os.stat(rawfile).st_size
		if rawsize > max_gb * 1000000000:
			self.log_skip(resource, "filesize %sGB exceeds available RAM" % int(rawsize/1000000000))
			return False
		"""

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        if resource['dataset_info']['name'].find("SWIR") > -1:
            sensor_rawname = 'SWIR'
            sensor_fullname = 'swir_netcdf'
            soil_mask = None
        else:
            sensor_rawname = 'VNIR'
            sensor_fullname = 'vnir_netcdf'
            # Check for corresponding soil mask to include in workflow.sh if available
            soil_mask = self.sensors.get_sensor_path(timestamp,
                                                     sensor='vnir_soil_masks',
                                                     opts=['soil_mask'])
        out_nc = self.sensors.create_sensor_path(timestamp,
                                                 sensor=sensor_fullname)
        xps_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname,
                                                opts=['xps'])
        ind_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname,
                                                opts=['ind'])
        csv_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname.replace(
                                                    "_netcdf", "_traits"))

        raw_file, terra_md_full = None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, sensor_rawname)
            elif fname.endswith('raw'):
                raw_file = fname
        if None in [raw_file, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(
            timestamp, sensor_rawname, terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        print_name = self.sensors.get_display_name(sensor=sensor_fullname)
        self.log_info(
            resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
            (season_name, experiment_name, print_name, timestamp[:4],
             timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            season_name,
            experiment_name,
            print_name,
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        uploaded_file_ids = []

        # Perform actual processing
        if (not file_exists(out_nc)) or self.overwrite:
            """TODO: OLD AND NOT USED
			self.log_info(resource, 'invoking hyperspectral_workflow.sh to create: %s' % out_nc)
			if soil_mask and file_exists(soil_mask):
				# If soil mask exists, we can generate an _ind indices file
				returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h",
										  "-m", soil_mask, "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth
			else:
				# Otherwise we cannot, and need to trigger soilmask extractor and circle back later
				returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h",
											  "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth
			if returncode != 0:
				raise ValueError('script encountered an error')
			"""

            self.log_info(resource,
                          'invoking python calibration to create: %s' % out_nc)
            create_empty_netCDF(raw_file, out_nc)
            self.log_info(resource, 'applying calibration to: %s' % out_nc)
            apply_calibration(raw_file, out_nc)
            self.log_info(resource, '...done' % raw_file)

            found_in_dest = check_file_in_dataset(connector,
                                                  host,
                                                  secret_key,
                                                  target_dsid,
                                                  out_nc,
                                                  remove=self.overwrite)
            if not found_in_dest or self.overwrite:
                fileid = upload_to_dataset(connector, host, secret_key,
                                           target_dsid, out_nc)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            self.created += 1
            self.bytes += os.path.getsize(out_nc)

            # TODO: Still compatible?
            #if not soil_mask:
            #	self.log_info(resource, "triggering soil mask extractor on %s" % fileid)
            #	submit_extraction(connector, host, secret_key, fileid, "terra.sunshade.soil_removal")

            # TODO: Sent output to BETYdb
            """
			# Send indices to betyDB
			if file_exists(ind_file):
				# TODO: Use ncks to trim ind_file to plots before this step
				plot_no = 'Full Field'
	
				with Dataset(ind_file, "r") as netCDF_handle:
					ndvi = netCDF_handle.get_variables_by_attributes(standard_name='normalized_difference_chlorophyll_index_750_705')
					NDVI705 = ndvi[0].getValue().ravel()[0]
	
					# TODO: Map the remaining ~50 variables in BETY to create indices file
					# TODO: In netCDF header,
	
					csv_header = 'local_datetime,NDVI705,access_level,species,site,' \
								 'citation_author,citation_year,citation_title,method'
					csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \
							   'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % (
									timestamp, NDVI705, plot_no)
					with open(csv_file, 'w') as c:
						c.write(csv_header+'\n'+csv_vals)
	
				# TODO: Send this CSV to betydb & geostreams extractors instead
				submit_traits(csv_file, bety_key=self.bety_key)
			"""

        self.end_message(resource)

Example #11

Show file

    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']
        left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        self.log_info(resource, "determining image quality")
        left_qual = getImageQuality(img_left)
        if not self.leftonly:
            right_qual = getImageQuality(img_right)

        left_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box'])

        if not file_exists(left_nrmac_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % left_nrmac_tiff)
            create_geotiff(np.array([[left_qual, left_qual],[left_qual, left_qual]]), left_bounds,
                           left_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(left_nrmac_tiff)
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_nrmac_tiff,
                                              remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            self.log_info(resource, "uploading %s" % left_nrmac_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid,
                                       left_nrmac_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)


        if not self.leftonly:
            if (not file_exists(right_nrmac_tiff) or self.overwrite):
                self.log_info(resource, "creating %s" % right_nrmac_tiff)
                create_geotiff(np.array([[right_qual, right_qual],[right_qual, right_qual]]), right_bounds,
                               right_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True)
                self.created += 1
                self.bytes += os.path.getsize(right_nrmac_tiff)
            found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_nrmac_tiff,
                                                  remove=self.overwrite)
            if not found_in_dest or self.overwrite:
                self.log_info(resource, "uploading %s" % right_nrmac_tiff)
                fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid,
                                           right_nrmac_tiff)
                uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        md = {
            "files_created": uploaded_file_ids,
            "left_quality_score": left_qual
        }
        if not self.leftonly:
            md["right_quality_score"] = right_qual
        extractor_md = build_metadata(host, self.extractor_info, resource['id'], md, 'file')
        self.log_info(resource, "uploading extractor metadata to Lv1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)