def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.bin', '_right.bin']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if not self.overwrite and get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if file_exists(left_tiff) and file_exists(right_tiff):
                    self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_error(resource, "no terraref metadata found; sending to cleaner")
            submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner")
            return CheckMessage.ignore
Exemple #2
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for an _ir.bin file before beginning processing
        if not contains_required_files(resource, ['_ir.bin']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                tif = self.sensors.get_sensor_path(timestamp)
                png = tif.replace(".tif", ".png")
                if file_exists(png) and file_exists(tif):
                    self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
                    return CheckMessage.ignore
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            return CheckMessage.ignore
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.tif', '_right.tif']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if not self.force:
                # Check NRMAC score > 15 before proceeding if available
                nrmac_md = get_extractor_metadata(md, "terra.stereo-rgb.nrmac")
                if not (nrmac_md and 'left_quality_score' in nrmac_md):
                    self.log_skip(resource,
                                  "NRMAC quality score not available")
                    return CheckMessage.ignore
                elif float(nrmac_md['left_quality_score']) > self.threshold:
                    self.log_skip(
                        resource,
                        "NRMAC quality score %s is above threshold of %s" %
                        (float(
                            nrmac_md['left_quality_score']), self.threshold))
                    return CheckMessage.ignore

            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                                opts=['left'])
                right_enh_tiff = self.sensors.create_sensor_path(
                    timestamp, opts=['right'])
                if file_exists(left_enh_tiff) and file_exists(right_enh_tiff):
                    if contains_required_files(resource, [
                            os.path.basename(left_enh_tiff),
                            os.path.basename(right_enh_tiff)
                    ]):
                        self.log_skip(
                            resource,
                            "metadata v%s and outputs already exist" %
                            self.extractor_info['version'])
                        return CheckMessage.ignore
                    else:
                        self.log_info(
                            resource,
                            "output files exist but not yet uploaded")
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_error(resource, "no terraref metadata found")
            return CheckMessage.ignore
Exemple #4
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        if len(resource['files']) >= 23:
            md = download_metadata(connector, host, secret_key, resource['id'])
            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                timestamp = resource['name'].split(" - ")[1]
                out_fullday_netcdf = self.sensors.create_sensor_path(timestamp)
                out_fullday_csv = out_fullday_netcdf.replace(".nc", "_geo.csv")
                if file_exists(out_fullday_netcdf) and file_exists(
                        out_fullday_csv):
                    self.log_skip(
                        resource, "metadata v%s and outputs already exist" %
                        self.extractor_info['version'])
                    return CheckMessage.ignore
            return CheckMessage.download
        else:
            self.log_skip(resource, "found less than 23 files")
            return CheckMessage.ignore
Exemple #5
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        if not contains_required_files(
                resource,
            ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        if resource['dataset_info']['name'].find("SWIR") > -1:
            sensor_fullname = 'swir_netcdf'
        else:
            sensor_fullname = 'vnir_netcdf'

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                # Make sure outputs properly exist
                out_nc = self.sensors.get_sensor_path(timestamp,
                                                      sensor=sensor_fullname)
                if file_exists(out_nc):
                    self.log_skip(
                        resource, "metadata v%s and outputs already exist" %
                        self.extractor_info['version'])
                    return CheckMessage.ignore
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            # See if we can recover it from disk
            if sensor_fullname == 'vnir_netcdf':
                date = timestamp.split("__")[0]
                source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % (
                    date, timestamp)
                for f in os.listdir(source_dir):
                    if f.endswith("_metadata.json"):
                        self.log_info(resource,
                                      "updating metadata from %s" % f)
                        raw_dsmd = load_json_file(os.path.join(source_dir, f))
                        clean_md = clean_metadata(raw_dsmd, 'VNIR')
                        complete_md = build_metadata(host, self.extractor_info,
                                                     resource['id'], clean_md,
                                                     'dataset')
                        remove_metadata(connector, host, secret_key,
                                        resource['id'])
                        upload_metadata(connector, host, secret_key,
                                        resource['id'], complete_md)
                        return CheckMessage.download
            return CheckMessage.ignore
Exemple #6
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.tif', '_right.tif']):
            self.log_skip(resource, "missing required files")
            # Check for raw_data_source in metadata and resumbit to bin2tif if available...
            md = download_metadata(connector, host, secret_key, resource['id'])
            terra_md = get_terraref_metadata(md)
            if 'raw_data_source' in terra_md:
                raw_id = str(terra_md['raw_data_source'].split("/")[-1])
                self.log_info(resource, "submitting raw source %s to bin2tif" % raw_id)
                submit_extraction(connector, host, secret_key, raw_id, "terra.stereo-rgb.bin2tif")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if (self.leftonly and file_exists(left_nrmac_tiff)) or (
                                not self.leftonly and file_exists(left_nrmac_tiff) and file_exists(right_nrmac_tiff)):
                    if contains_required_files(resource, [os.path.basename(left_nrmac_tiff)]):
                        self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
                        return CheckMessage.ignore
                    else:
                        self.log_info(resource, "output file exists but not yet uploaded")
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            return CheckMessage.ignore
Exemple #7
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            # Check for a left and right TIF file - skip if not found
            # If we're only processing the left files, don't check for the right file
            needed_files = ['_left.tif']
            if not self.leftonly:
                needed_files.append('_right.tif')
            if not contains_required_files(resource, needed_files):
                self.log_skip(resource, "missing required files")
                return CheckMessage.ignore

            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if (self.leftonly and file_exists(left_mask_tiff)) or \
                   (not (file_exists(left_mask_tiff) and file_exists(right_mask_tiff))):
                    self.log_skip(resource, "metadata v%s and outputs already exist" % \
                                  self.extractor_info['version'])
                    return CheckMessage.ignore
        # Check for other images to create a mask on
        elif not contains_required_files(resource, ['.tif']):
            self.log_skip(resource, "missing required tiff file")
            return CheckMessage.ignore

        # Have TERRA-REF metadata, but not any from this extractor
        return CheckMessage.download
Exemple #8
0
def file_is_image_type(identify_binary, filename, metadata_filename=None):
    """Uses the identify application to generate the MIME type of the file and
       looks for an image MIME type. If a metadata filename is specified, the
       JSON in the file is loaded first and the MIME type is looked for. If
       the metadata filename is not specified, or a MIME type was not found in
       the metadata, the identity application is used.

    Args:
        identify_binary(str): path to the executable which will return a MIME type on an image file
        filename(str): the path to the file to check
        metadata_filename(str): the path to JSON metadata associated with the file in which to look
        for a 'contentType' tag containing the MIME type

    Returns:
        True is returned if the file is a MIME image type
        False is returned upon failure or the file is not a type of image
    """
    logger = logging.getLogger(__name__)

    # Try to determine the file type from its JSON information (metadata if from Clowder API)
    try:
        if metadata_filename and file_exists(metadata_filename):
            file_md = load_json_file(metadata_filename)
            if file_md:
                if 'contentType' in file_md:
                    if file_md['contentType'].startswith('image'):
                        return True
    # pylint: disable=broad-except
    except Exception as ex:
        logger.info("Exception caught: %s", str(ex))
    # pylint: enable=broad-except

    # Try to determine the file type locally
    try:
        is_image_type = find_image_mime_type(
            subprocess.check_output([identify_binary, "-verbose", filename],
                                    stderr=subprocess.STDOUT))

        if not is_image_type is None:
            return is_image_type
    # pylint: disable=broad-except
    except Exception as ex:
        logger.info("Exception caught: %s", str(ex))
    # pylint: enable=broad-except

    return False
Exemple #9
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Build list of JSON files
        json_files = []
        for f in resource['files']:
            if f['filename'].endswith("_environmentlogger.json"):
                if f['filepath'].startswith("/home/clowder"):
                    json_files.append(f['filepath'].replace(
                        "/home/clowder", "/home/extractor"))
                else:
                    json_files.append(f['filepath'])
        json_files.sort()

        # Determine full output path
        timestamp = resource['name'].split(" - ")[1]
        out_fullday_netcdf = self.sensors.create_sensor_path(timestamp)
        temp_out_full = os.path.join(os.path.dirname(out_fullday_netcdf),
                                     "temp_full.nc")
        temp_out_single = temp_out_full.replace("_full.nc", "_single.nc")
        geo_csv = out_fullday_netcdf.replace(".nc", "_geo.csv")

        if not file_exists(temp_out_full):
            for json_file in json_files:
                self.log_info(
                    resource, "converting %s to netCDF & appending" %
                    os.path.basename(json_file))
                ela.mainProgramTrigger(json_file, temp_out_single)
                cmd = "ncrcat --record_append %s %s" % (temp_out_single,
                                                        temp_out_full)
                subprocess.call([cmd], shell=True)
                os.remove(temp_out_single)

            shutil.move(temp_out_full, out_fullday_netcdf)
            self.created += 1
            self.bytes += os.path.getsize(out_fullday_netcdf)

        # Write out geostreams.csv
        if not file_exists(geo_csv):
            self.log_info(resource, "writing geostreams CSV")
            geo_file = open(geo_csv, 'w')
            geo_file.write(','.join([
                'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value',
                'timestamp'
            ]) + '\n')
            with Dataset(out_fullday_netcdf, "r") as ncdf:
                streams = set([
                    sensor_info.name
                    for sensor_info in ncdf.variables.values()
                    if sensor_info.name.startswith('sensor')
                ])
                for stream in streams:
                    if stream != "sensor_spectrum":
                        try:
                            memberlist = ncdf.get_variables_by_attributes(
                                sensor=stream)
                            for members in memberlist:
                                data_points = _produce_attr_dict(members)
                                for index in range(len(data_points)):
                                    dp_obj = data_points[index]
                                    if dp_obj["sensor"] == stream:
                                        time_format = "%Y-%m-%dT%H:%M:%S-07:00"
                                        time_point = (datetime.datetime(year=1970, month=1, day=1) + \
                                                      datetime.timedelta(days=ncdf.variables["time"][index])).strftime(time_format)

                                        geo_file.write(','.join([
                                            "Full Field - Environmental Logger",
                                            "(EL) %s" % stream,
                                            str(33.075576),
                                            str(-111.974304), time_point,
                                            host +
                                            ("" if host.endswith("/") else "/"
                                             ) + "datasets/" + resource['id'],
                                            '"%s"' % json.dumps(dp_obj).
                                            replace('"', '""'), timestamp
                                        ]) + '\n')

                        except:
                            self.log_error(
                                resource,
                                "NetCDF attribute not found: %s" % stream)

        # Fetch dataset ID by dataset name if not provided
        target_dsid = build_dataset_hierarchy_crawl(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            None,
            None,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        ds_files = get_file_list(connector, host, secret_key, target_dsid)
        found_full = False
        found_csv = False
        for f in ds_files:
            if f['filename'] == os.path.basename(out_fullday_netcdf):
                found_full = True
            if f['filename'] == os.path.basename(geo_csv):
                found_csv = True
        if not found_full:
            upload_to_dataset(connector, host, secret_key, target_dsid,
                              out_fullday_netcdf)
        if not found_csv:
            geoid = upload_to_dataset(connector, host, secret_key, target_dsid,
                                      geo_csv)
            self.log_info(resource,
                          "triggering geostreams extractor on %s" % geoid)
            submit_extraction(connector, host, secret_key, geoid,
                              "terra.geostreams")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"output_dataset": target_dsid}, 'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        self.end_message(resource)
Exemple #10
0
    def generateDarkerMosaic(self, connector, host, secret_key, out_dir,
                             out_vrt, out_tif_thumb, out_tif_full,
                             out_tif_medium, parameters, resource):
        # Create dark-pixel mosaic from geotiff list using multipass for darker pixel selection
        created, bytes = 0, 0

        if ((os.path.isfile(out_vrt) and os.path.getsize(out_vrt) == 0)
                or (not os.path.isfile(out_vrt)) or self.overwrite):
            fileidpath = self.remapMountPath(connector,
                                             str(parameters['file_paths']))
            with open(fileidpath) as flist:
                file_path_list = json.load(flist)
            self.log_info(
                resource,
                "processing %s TIFs with dark flag" % len(file_path_list))

            # Write input list to tmp file
            tiflist = "tiflist.txt"
            with open(tiflist, "w") as tifftxt:
                for tpath in file_path_list:
                    filepath = self.remapMountPath(connector, tpath)
                    tifftxt.write("%s\n" % filepath)

            # Create VRT from every GeoTIFF
            self.log_info(resource, "Creating VRT %s..." % out_vrt)
            full_day_to_tiles.createVrtPermanent(out_dir, tiflist, out_vrt)
            created += 1
            bytes += os.path.getsize(out_vrt)

            # Split full tiflist into parts according to split number
            shade.split_tif_list(tiflist, out_dir, self.split)

            # Generate tiles from each split VRT into numbered folders
            shade.create_diff_tiles_set(out_dir, self.split)

            # Choose darkest pixel from each overlapping tile
            unite_tiles_dir = os.path.join(out_dir, 'unite')
            if not os.path.exists(unite_tiles_dir):
                os.mkdir(unite_tiles_dir)
            shade.integrate_tiles(out_dir, unite_tiles_dir, self.split)

            # If any files didn't have overlap, copy individual tile
            shade.copy_missing_tiles(out_dir,
                                     unite_tiles_dir,
                                     self.split,
                                     tiles_folder_name='tiles_left')

            # Create output VRT from overlapped tiles
            shade.create_unite_tiles(unite_tiles_dir, out_vrt)
            created += 1
            bytes += os.path.getsize(out_vrt)

        if (not file_exists(out_tif_thumb)) or self.overwrite:
            self.log_info(resource, "Converting VRT to %s..." % out_tif_thumb)
            subprocess.call(
                "gdal_translate -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 "
                + "-outsize %s%% %s%% %s %s" % (2, 2, out_vrt, out_tif_thumb),
                shell=True)
            created += 1
            bytes += os.path.getsize(out_tif_thumb)

        if not self.thumb:
            if (not file_exists(out_tif_medium)) or self.overwrite:
                self.log_info(resource,
                              "Converting VRT to %s..." % out_tif_medium)
                subprocess.call(
                    "gdal_translate -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 "
                    + "-outsize %s%% %s%% %s %s" %
                    (10, 10, out_vrt, out_tif_medium),
                    shell=True)
                created += 1
                bytes += os.path.getsize(out_tif_medium)

            if self.full and (not file_exists(out_tif_full) or self.overwrite):
                if (not os.path.isfile(out_tif_full)) or self.overwrite:
                    logging.info("Converting VRT to %s..." % out_tif_full)
                    subprocess.call(
                        "gdal_translate -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 "
                        + "%s %s" % (out_vrt, out_tif_full),
                        shell=True)
                    created += 1
                    bytes += os.path.getsize(out_tif_full)

        return (created, bytes)
Exemple #11
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # rulechecker provided some key information for us in parameters
        if type(parameters) is str:
            parameters = json.loads(parameters)
        if 'parameters' in parameters:
            parameters = parameters['parameters']
        if type(parameters) is unicode:
            parameters = json.loads(str(parameters))
        dataset_name = parameters["output_dataset"]
        scan_name = parameters["scan_type"] if "scan_type" in parameters else ""

        timestamp = dataset_name.split(" - ")[1]

        # Input path will suggest which sensor we are seeing
        sensor_name, sensor_lookup = None, None
        for f in resource['files']:
            if f['filepath'].find("rgb_geotiff") > -1:
                sensor_name = "stereoTop"
                sensor_lookup = "rgb_fullfield"
            elif f['filepath'].find("ir_geotiff") > -1:
                sensor_name = "flirIrCamera"
                sensor_lookup = "ir_fullfield"
            elif f['filepath'].find("laser3d_heightmap") > -1:
                sensor_name = "scanner3DTop"
                sensor_lookup = "laser3d_fullfield"
            if sensor_lookup is not None:
                break

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(
            timestamp, sensor_name, {})
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output file paths
        out_tif_full = self.sensors.create_sensor_path(
            timestamp, sensor=sensor_lookup,
            opts=[scan_name]).replace(" ", "_")
        out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif")
        out_tif_medium = out_tif_full.replace(".tif", "_10pct.tif")
        out_png = out_tif_full.replace(".tif", ".png")
        out_vrt = out_tif_full.replace(".tif", ".vrt")
        out_dir = os.path.dirname(out_vrt)

        # TODO: Check for L1 version of VRT and _thumb and if the JSON contents match, copy instead of regenerating

        # If outputs already exist, we don't need to do anything else
        found_all = True
        if self.thumb:
            output_files = [out_vrt, out_tif_thumb]
        else:
            output_files = [out_tif_full, out_tif_medium, out_png]
        for output_file in output_files:
            if not file_exists(output_file):
                found_all = False
                break
        if found_all and not self.overwrite:
            if self.thumb:
                self.log_info(
                    resource,
                    "thumb output already exists; triggering terra.geotiff.fieldmosaic_full"
                )
                r = requests.post(
                    "%sapi/%s/%s/extractions?key=%s" %
                    (host, 'datasets', resource['id'], secret_key),
                    headers={"Content-Type": "application/json"},
                    data=json.dumps({
                        "extractor": 'terra.geotiff.fieldmosaic_full',
                        "parameters": parameters
                    }))
                r.raise_for_status()
            else:
                self.log_skip(resource, "all outputs already exist")
            return

        # Perform actual field stitching
        if not self.darker or sensor_lookup != 'rgb_fullfield':
            (nu_created, nu_bytes) = self.generateSingleMosaic(
                connector, host, secret_key, out_dir, out_vrt, out_tif_thumb,
                out_tif_full, out_tif_medium, parameters, resource)
        else:
            (nu_created, nu_bytes) = self.generateDarkerMosaic(
                connector, host, secret_key, out_dir, out_vrt, out_tif_thumb,
                out_tif_full, out_tif_medium, parameters, resource)
        self.created += nu_created
        self.bytes += nu_bytes

        if not self.thumb and os.path.isfile(out_tif_medium):
            # Create PNG thumbnail
            self.log_info(resource, "Converting 10pct to %s..." % out_png)
            cmd = "gdal_translate -of PNG %s %s" % (out_tif_medium, out_png)
            subprocess.call(cmd, shell=True)
            self.created += 1
            self.bytes += os.path.getsize(out_png)

        self.log_info(
            resource, "Hierarchy: %s / %s / %s / %s / %s" %
            (season_name, experiment_name,
             self.sensors.get_display_name(sensor=sensor_lookup),
             timestamp[:4], timestamp[5:7]))

        # Get dataset ID or create it, creating parent collections as needed
        target_dsid = build_dataset_hierarchy_crawl(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            season_name,
            experiment_name,
            self.sensors.get_display_name(sensor=sensor_lookup),
            timestamp[:4],
            timestamp[5:7],
            leaf_ds_name=dataset_name)

        # Upload full field image to Clowder
        content = {
            "comment":
            "This stitched image is computed based on an assumption that the scene is planar. \
                There are likely to be be small offsets near the boundary of two images anytime there are plants \
                at the boundary (because those plants are higher than the ground plane), or where the dirt is \
                slightly higher or lower than average.",
            "file_ids": parameters["file_paths"]
        }

        # If we newly created these files, upload to Clowder
        if self.thumb:
            generated_files = [out_tif_thumb]
        else:
            generated_files = [out_tif_medium, out_tif_full, out_png]
        for checked_file in generated_files:
            if os.path.isfile(checked_file):
                found_in_dest = check_file_in_dataset(connector, host,
                                                      secret_key, target_dsid,
                                                      checked_file)
                #, replacements=[("ir_fullfield", "fullfield"), ("L2", "L1")])
                if not found_in_dest:
                    id = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           checked_file)
                    meta = build_metadata(host, self.extractor_info, id,
                                          content, 'file')
                    upload_metadata(connector, host, secret_key, id, meta)

                    if checked_file == out_tif_full:
                        # Trigger downstream extractions on full resolution
                        if sensor_lookup == 'ir_fullfield':
                            submit_extraction(connector, host, secret_key, id,
                                              "terra.multispectral.meantemp")
                        elif sensor_lookup == 'rgb_fullfield' and checked_file.endswith(
                                "_mask.tif"):
                            submit_extraction(connector, host, secret_key, id,
                                              "terra.stereo-rgb.canopycover")

        if self.thumb:
            # TODO: Add parameters support to pyclowder submit_extraction()
            self.log_info(resource,
                          "triggering terra.geotiff.fieldmosaic_full")
            r = requests.post("%sapi/%s/%s/extractions?key=%s" %
                              (host, 'datasets', resource['id'], secret_key),
                              headers={"Content-Type": "application/json"},
                              data=json.dumps({
                                  "extractor":
                                  'terra.geotiff.fieldmosaic_full',
                                  "parameters": parameters
                              }))
            r.raise_for_status()

        self.end_message(resource)
Exemple #12
0
    def process_message(self, connector, host, secret_key, resource, parameters):

        super(rgbEnhancementExtractor, self).process_message(connector, host, secret_key,
                                                             resource, parameters)

        self.start_message(resource)

        # Get left/right files and metadata
        process_files = []
        if not self.get_terraref_metadata is None:
            process_files = find_terraref_files(resource)
        else:
            process_files = find_image_files(self.args.identify_binary, resource,
                                             self.file_infodata_file_ending)

        # Get the best username, password, and space
        old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace)
        self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context()

        # Ensure that the clowder information is valid
        if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user,
                                    self.clowder_pass):
            self.log_error(resource, "Clowder configuration is invalid. Not processing " +\
                                     "request")
            self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space)
            self.end_message(resource)
            return

        # Change the base path of files to include the user by tweaking the sensor's value
        sensor_old_base = None
        if self.get_terraref_metadata is None:
            _, new_base = self.get_username_with_base_path(host, secret_key, resource['id'],
                                                           self.sensors.base)
            sensor_old_base = self.sensors.base
            self.sensors.base = new_base

        # Prepare for processing files
        timestamp = timestamp_to_terraref(self.find_timestamp(resource['dataset_info']['name']))
        target_dsid = resource['id']
        uploaded_file_ids = []
        ratios = []

        try:
            for one_file in process_files:

                mask_source = one_file

                # Make sure the source image is in the correct EPSG space
                epsg = get_epsg(one_file)
                if epsg != self.default_epsg:
                    self.log_info(resource, "Reprojecting from " + str(epsg) +
                                  " to default " + str(self.default_epsg))
                    _, tmp_name = tempfile.mkstemp()
                    src = gdal.Open(one_file)
                    gdal.Warp(tmp_name, src, dstSRS='EPSG:'+str(self.default_epsg))
                    mask_source = tmp_name

                # Get the bounds of the image to see if we can process it. Also get the mask filename
                rgb_mask_tif, bounds = self.get_maskfilename_bounds(mask_source, timestamp)

                if bounds is None:
                    self.log_skip(resource, "Skipping non-georeferenced image: " + \
                                                                    os.path.basename(one_file))
                    if mask_source != one_file:
                        os.remove(mask_source)
                    continue

                if not file_exists(rgb_mask_tif) or self.overwrite:
                    self.log_info(resource, "creating %s" % rgb_mask_tif)

                    mask_ratio, mask_rgb = gen_cc_enhanced(mask_source)
                    ratios.append(mask_ratio)

                    # Bands must be reordered to avoid swapping R and B
                    mask_rgb = cv2.cvtColor(mask_rgb, cv2.COLOR_BGR2RGB)

                    create_geotiff(mask_rgb, bounds, rgb_mask_tif, None, False, self.extractor_info,
                                   self.get_terraref_metadata)
                    compress_geotiff(rgb_mask_tif)

                    # Remove any temporary file
                    if mask_source != one_file:
                        os.remove(mask_source)

                    self.created += 1
                    self.bytes += os.path.getsize(rgb_mask_tif)

                found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid,
                                                      rgb_mask_tif, remove=self.overwrite)
                if not found_in_dest:
                    self.log_info(resource, "uploading %s" % rgb_mask_tif)
                    fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass,
                                               target_dsid, rgb_mask_tif)
                    uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") +
                                             "files/" + fileid)

            # Tell Clowder this is completed so subsequent file updates don't daisy-chain
            if not self.get_terraref_metadata is None:
                ratios_len = len(ratios)
                left_ratio = (ratios[0] if ratios_len > 0 else None)
                right_ratio = (ratios[1] if ratios_len > 1 else None)
                md = {
                    "files_created": uploaded_file_ids
                }
                if not left_ratio is None:
                    md["left_mask_ratio"] = left_ratio
                if not self.leftonly and not right_ratio is None:
                    md["right_mask_ratio"] = right_ratio
                extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset')
                self.log_info(resource, "uploading extractor metadata to Lv1 dataset")
                remove_metadata(connector, host, secret_key, resource['id'],
                                self.extractor_info['name'])
                upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if not sensor_old_base is None:
                self.sensors.base = sensor_old_base

            self.clowder_user, self.clowder_pass, self.clowderspace = (old_un, old_pw, old_space)
            self.end_message(resource)
Exemple #13
0
    def generateSingleMosaic(self, connector, host, secret_key, out_dir,
                             out_vrt, out_tif_thumb, out_tif_full,
                             out_tif_medium, parameters, resource):
        # Create simple mosaic from geotiff list
        created, bytes = 0, 0

        #if (os.path.isfile(out_vrt) and os.path.getsize(out_vrt) == 0) or (not os.path.isfile(out_vrt)) or self.overwrite:
        fileidpath = self.remapMountPath(connector,
                                         str(parameters['file_paths']))
        with open(fileidpath) as flist:
            file_path_list = json.load(flist)
        self.log_info(
            resource,
            "processing %s TIFs without dark flag" % len(file_path_list))

        # Write input list to tmp file
        tiflist = "tiflist.txt"
        with open(tiflist, "w") as tifftxt:
            for tpath in file_path_list:
                filepath = self.remapMountPath(connector, tpath)
                tifftxt.write("%s\n" % filepath)

        if (self.thumb and ((not file_exists(out_vrt)) or self.overwrite)) or (
                not self.thumb and (not file_exists(out_vrt))):
            # Create VRT from every GeoTIFF
            self.log_info(resource, "Creating VRT %s..." % out_vrt)
            if out_vrt.endswith("_mask.vrt"):
                full_day_to_tiles.createVrtPermanent(out_dir,
                                                     tiflist,
                                                     out_vrt,
                                                     alpha=True)
            else:
                full_day_to_tiles.createVrtPermanent(out_dir, tiflist, out_vrt)
            os.remove(tiflist)
            created += 1
            bytes += os.path.getsize(out_vrt)

        if (not file_exists(out_tif_thumb)) or self.overwrite:
            # Omit _mask.vrt from 2%
            if not (out_vrt.endswith('_mask.vrt')):
                self.log_info(resource,
                              "Converting VRT to %s..." % out_tif_thumb)
                cmd = "gdal_translate -co COMPRESS=LZW -co BIGTIFF=YES -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 " + \
                      "-outsize %s%% %s%% %s %s" % (2, 2, out_vrt, out_tif_thumb)
                subprocess.call(cmd, shell=True)
                created += 1
                bytes += os.path.getsize(out_tif_thumb)

        if not self.thumb:
            if (not file_exists(out_tif_medium)) or self.overwrite:
                # Omit _mask.vrt and _nrmac.vrt from 10%
                if not (out_vrt.endswith('_mask.vrt')
                        or out_vrt.endswith('_nrmac.vrt')):
                    self.log_info(resource,
                                  "Converting VRT to %s..." % out_tif_medium)
                    cmd = "gdal_translate -co COMPRESS=LZW -co BIGTIFF=YES -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 " + \
                          "-outsize %s%% %s%% %s %s" % (10, 10, out_vrt, out_tif_medium)
                    subprocess.call(cmd, shell=True)
                    created += 1
                    bytes += os.path.getsize(out_tif_medium)

            if (not file_exists(out_tif_full)) or self.overwrite:
                # Omit _nrmac.vrt from 100%
                if not out_vrt.endswith('_nrmac.vrt'):
                    logging.info("Converting VRT to %s..." % out_tif_full)
                    cmd = "gdal_translate -co COMPRESS=LZW -co BIGTIFF=YES -projwin -111.9750963 33.0764953 -111.9747967 33.074485715 " + \
                          "%s %s" % (out_vrt, out_tif_full)
                    subprocess.call(cmd, shell=True)
                    created += 1
                    bytes += os.path.getsize(out_tif_full)

        return (created, bytes)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        """Performs plot level image extraction

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            resource(dict): dictionary containing the resources associated with the request
            parameters(json): json object of the triggering message contents
        """
        self.start_message(resource)
        super(ClipByShape, self).process_message(connector, host, secret_key,
                                                 resource, parameters)

        # Handle any parameters
        if isinstance(parameters, basestring):
            parameters = json.loads(parameters)
        if isinstance(parameters, unicode):
            parameters = json.loads(str(parameters))

        # Initialize local variables
        dataset_name = parameters["datasetname"]
        season_name, experiment_name = "Unknown Season", "Unknown Experiment"
        datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None

        # Array containing the links to uploaded files
        uploaded_file_ids = []

        # Find the files we're interested in
        # pylint: disable=line-too-long
        (shapefile, shxfile, dbffile,
         imagefiles) = self.find_shape_image_files(resource['local_paths'],
                                                   resource['triggering_file'])
        # pylint: enable=line-too-long
        if shapefile is None:
            self.log_skip(resource, "No shapefile found")
            return
        if shxfile is None:
            self.log_skip(resource, "No SHX file found")
            return
        num_image_files = len(imagefiles)
        if num_image_files <= 0:
            self.log_skip(resource,
                          "No image files with geographic boundaries found")
            return

        # Get the best username, password, and space
        old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass,
                                     self.clowderspace)
        self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context(
        )

        # Ensure that the clowder information is valid
        if not confirm_clowder_info(host, secret_key, self.clowderspace,
                                    self.clowder_user, self.clowder_pass):
            self.log_error(resource, "Clowder configuration is invalid. Not processing " +\
                                     "request")
            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)
            return

        # Change the base path of files to include the user by tweaking the sensor's value
        sensor_old_base = None
        if self.get_terraref_metadata is None:
            _, new_base = self.get_username_with_base_path(
                host, secret_key, resource['id'], self.sensors.base)
            sensor_old_base = self.sensors.base
            self.sensors.base = new_base

        try:
            # Build up a list of image IDs
            image_ids = {}
            if 'files' in resource:
                for one_image in imagefiles:
                    image_name = os.path.basename(one_image)
                    for res_file in resource['files']:
                        if ('filename' in res_file) and ('id' in res_file) and \
                                                            (image_name == res_file['filename']):
                            image_ids[image_name] = res_file['id']

            # Get timestamps. Also get season and experiment information for Clowder collections
            datestamp = self.find_datestamp(dataset_name)
            timestamp = timestamp_to_terraref(
                self.find_timestamp(dataset_name))
            (season_name, experiment_name,
             _) = self.get_season_and_experiment(datestamp, self.sensor_name)

            if self.experiment_metadata:
                if 'extractors' in self.experiment_metadata:
                    extractor_json = self.experiment_metadata['extractors']
                    if 'shapefile' in extractor_json:
                        if 'plot_column_name' in extractor_json['shapefile']:
                            plot_name_idx = extractor_json['shapefile'][
                                'plot_column_name']

            # Check our current local variables
            if dbffile is None:
                self.log_info(resource,
                              "DBF file not found, using default plot naming")
            self.log_info(resource, "Extracting plots using shapefile '" + \
                                                        os.path.basename(shapefile) + "'")

            # Load the shapes and find the plot name column if we have a DBF file
            shape_in = ogr.Open(shapefile)
            layer = shape_in.GetLayer(
                os.path.split(os.path.splitext(shapefile)[0])[1])
            feature = layer.GetNextFeature()
            layer_ref = layer.GetSpatialRef()

            if dbffile:
                shape_table = DBF(dbffile,
                                  lowernames=True,
                                  ignore_missing_memofile=True)
                shape_rows = iter(list(shape_table))

                # Make sure if we have the column name of plot-names specified that it exists in
                # the shapefile
                column_names = shape_table.field_names
                if not plot_name_idx is None:
                    if not find_all_plot_names(plot_name_idx, column_names):
                        ValueError(
                            resource,
                            "Shapefile data does not have specified plot name"
                            + " column '" + plot_name_idx + "'")

                # Lookup a plot name field to use
                if plot_name_idx is None:
                    for one_name in column_names:
                        # pylint: disable=line-too-long
                        if one_name == "observationUnitName":
                            plot_name_idx = one_name
                            break
                        elif (one_name.find('plot') >= 0) and (
                            (one_name.find('name') >= 0)
                                or one_name.find('id')):
                            plot_name_idx = one_name
                            break
                        elif one_name == 'id':
                            plot_name_idx = one_name
                            break
                        # pylint: enable=line-too-long
                if plot_name_idx is None:
                    ValueError(
                        resource,
                        "Shapefile data does not have a plot name field '" +
                        os.path.basename(dbffile) + "'")

            # Setup for the extracted plot images
            plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \
                                                                                    " (By Plot)"

            # Loop through each polygon and extract plot level data
            alternate_plot_id = 0
            while feature:

                # Current geometry to extract
                plot_poly = feature.GetGeometryRef()
                if layer_ref:
                    plot_poly.AssignSpatialReference(layer_ref)
                plot_spatial_ref = plot_poly.GetSpatialReference()

                # Determie the plot name to use
                plot_name = None
                alternate_plot_id = alternate_plot_id + 1
                if shape_rows and plot_name_idx:
                    try:
                        row = next(shape_rows)
                        plot_name = get_plot_name(plot_name_idx, row)
                    except StopIteration:
                        pass
                if not plot_name:
                    plot_name = "plot_" + str(alternate_plot_id)

                # Determine output dataset name
                leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp
                self.log_info(
                    resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
                    (season_name, experiment_name, plot_display_name,
                     datestamp[:4], datestamp[5:7], datestamp[8:10],
                     leaf_dataset))

                # Create the dataset, even if we have no data to put in it, so that the caller knows
                # it was addressed
                target_dsid = build_dataset_hierarchy_crawl(
                    host,
                    secret_key,
                    self.clowder_user,
                    self.clowder_pass,
                    self.clowderspace,
                    season_name,
                    experiment_name,
                    plot_display_name,
                    datestamp[:4],
                    datestamp[5:7],
                    datestamp[8:10],
                    leaf_ds_name=leaf_dataset)

                # Loop through all the images looking for overlap
                for filename in imagefiles:

                    # Get the bounds. We also get the reference systems in case we need to convert
                    # between them
                    bounds = imagefiles[filename]['bounds']
                    bounds_spatial_ref = bounds.GetSpatialReference()

                    # Checking for geographic overlap and skip if there is none
                    if not bounds_spatial_ref.IsSame(plot_spatial_ref):
                        # We need to convert coordinate system before an intersection
                        transform = osr.CoordinateTransformation(
                            bounds_spatial_ref, plot_spatial_ref)
                        new_bounds = bounds.Clone()
                        if new_bounds:
                            new_bounds.Transform(transform)
                            intersection = plot_poly.Intersection(new_bounds)
                            new_bounds = None
                    else:
                        # Same coordinate system. Simple intersection
                        intersection = plot_poly.Intersection(bounds)

                    if intersection.GetArea() == 0.0:
                        self.log_info(resource, "Skipping image: " + filename)
                        continue

                    # Determine where we're putting the clipped file on disk and determine overwrite
                    # pylint: disable=unexpected-keyword-arg
                    out_file = self.sensors.create_sensor_path(
                        timestamp,
                        filename=os.path.basename(filename),
                        plot=plot_name,
                        subsensor=self.sensor_name)
                    if (file_exists(out_file) and not self.overwrite):
                        # The file exists and don't want to overwrite it
                        self.logger.warn("Skipping existing output file: %s",
                                         out_file)
                        continue

                    self.log_info(
                        resource, "Attempting to clip '" + filename +
                        "' to polygon number " + str(alternate_plot_id))

                    # Create destination folder on disk if we haven't done that already
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    # Clip the raster
                    bounds_tuple = polygon_to_tuples_transform(
                        plot_poly, bounds_spatial_ref)

                    clip_pix = clip_raster(filename,
                                           bounds_tuple,
                                           out_path=out_file)
                    if clip_pix is None:
                        self.log_error(
                            resource,
                            "Failed to clip image to plot name " + plot_name)
                        continue

                    # Upload the clipped image to the dataset
                    found_in_dest = check_file_in_dataset(
                        connector,
                        host,
                        secret_key,
                        target_dsid,
                        out_file,
                        remove=self.overwrite)
                    if not found_in_dest or self.overwrite:
                        image_name = os.path.basename(filename)
                        content = {
                            "comment":
                            "Clipped from shapefile " +
                            os.path.basename(shapefile),
                            "imageName":
                            image_name
                        }
                        if image_name in image_ids:
                            content['imageID'] = image_ids[image_name]

                        fileid = upload_to_dataset(connector, host,
                                                   self.clowder_user,
                                                   self.clowder_pass,
                                                   target_dsid, out_file)
                        uploaded_file_ids.append(fileid)

                        # Generate our metadata
                        meta = build_metadata(host, self.extractor_info,
                                              fileid, content, 'file')
                        clowder_file.upload_metadata(connector, host,
                                                     secret_key, fileid, meta)
                    else:
                        self.logger.warn(
                            "Skipping existing file in dataset: %s", out_file)

                    self.created += 1
                    self.bytes += os.path.getsize(out_file)

                # Get the next shape to extract
                feature = layer.GetNextFeature()

            # Tell Clowder this is completed so subsequent file updates don't daisy-chain
            id_len = len(uploaded_file_ids)
            if id_len > 0 or self.created > 0:
                extractor_md = build_metadata(
                    host, self.extractor_info, resource['id'],
                    {"files_created": uploaded_file_ids}, 'dataset')
                self.log_info(
                    resource,
                    "Uploading shapefile plot extractor metadata to Level_2 dataset: "
                    + str(extractor_md))
                clowder_dataset.remove_metadata(connector, host, secret_key,
                                                resource['id'],
                                                self.extractor_info['name'])
                clowder_dataset.upload_metadata(connector, host, secret_key,
                                                resource['id'], extractor_md)
            else:
                self.logger.warn(
                    "Skipping dataset metadata updating since no files were loaded"
                )

        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if not sensor_old_base is None:
                self.sensors.base = sensor_old_base

            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)
Exemple #15
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # clean tmp directory from any potential failed previous runs
        flist = os.listdir("/tmp")
        for f in flist:
            try:
                os.remove(os.path.join("/tmp", f))
            except:
                pass
        """ if file is above configured limit, skip it
		max_gb = 24 # RAM has 4x requirement, e.g. 24GB requires 96GB RAM
		for fname in resource['local_paths']:
			if fname.endswith('raw'): rawfile = fname
		rawsize = os.stat(rawfile).st_size
		if rawsize > max_gb * 1000000000:
			self.log_skip(resource, "filesize %sGB exceeds available RAM" % int(rawsize/1000000000))
			return False
		"""

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        if resource['dataset_info']['name'].find("SWIR") > -1:
            sensor_rawname = 'SWIR'
            sensor_fullname = 'swir_netcdf'
            soil_mask = None
        else:
            sensor_rawname = 'VNIR'
            sensor_fullname = 'vnir_netcdf'
            # Check for corresponding soil mask to include in workflow.sh if available
            soil_mask = self.sensors.get_sensor_path(timestamp,
                                                     sensor='vnir_soil_masks',
                                                     opts=['soil_mask'])
        out_nc = self.sensors.create_sensor_path(timestamp,
                                                 sensor=sensor_fullname)
        xps_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname,
                                                opts=['xps'])
        ind_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname,
                                                opts=['ind'])
        csv_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname.replace(
                                                    "_netcdf", "_traits"))

        raw_file, terra_md_full = None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, sensor_rawname)
            elif fname.endswith('raw'):
                raw_file = fname
        if None in [raw_file, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(
            timestamp, sensor_rawname, terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        print_name = self.sensors.get_display_name(sensor=sensor_fullname)
        self.log_info(
            resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
            (season_name, experiment_name, print_name, timestamp[:4],
             timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            season_name,
            experiment_name,
            print_name,
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        uploaded_file_ids = []

        # Perform actual processing
        if (not file_exists(out_nc)) or self.overwrite:
            """TODO: OLD AND NOT USED
			self.log_info(resource, 'invoking hyperspectral_workflow.sh to create: %s' % out_nc)
			if soil_mask and file_exists(soil_mask):
				# If soil mask exists, we can generate an _ind indices file
				returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h",
										  "-m", soil_mask, "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth
			else:
				# Otherwise we cannot, and need to trigger soilmask extractor and circle back later
				returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h",
											  "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth
			if returncode != 0:
				raise ValueError('script encountered an error')
			"""

            self.log_info(resource,
                          'invoking python calibration to create: %s' % out_nc)
            create_empty_netCDF(raw_file, out_nc)
            self.log_info(resource, 'applying calibration to: %s' % out_nc)
            apply_calibration(raw_file, out_nc)
            self.log_info(resource, '...done' % raw_file)

            found_in_dest = check_file_in_dataset(connector,
                                                  host,
                                                  secret_key,
                                                  target_dsid,
                                                  out_nc,
                                                  remove=self.overwrite)
            if not found_in_dest or self.overwrite:
                fileid = upload_to_dataset(connector, host, secret_key,
                                           target_dsid, out_nc)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            self.created += 1
            self.bytes += os.path.getsize(out_nc)

            # TODO: Still compatible?
            #if not soil_mask:
            #	self.log_info(resource, "triggering soil mask extractor on %s" % fileid)
            #	submit_extraction(connector, host, secret_key, fileid, "terra.sunshade.soil_removal")

            # TODO: Sent output to BETYdb
            """
			# Send indices to betyDB
			if file_exists(ind_file):
				# TODO: Use ncks to trim ind_file to plots before this step
				plot_no = 'Full Field'
	
				with Dataset(ind_file, "r") as netCDF_handle:
					ndvi = netCDF_handle.get_variables_by_attributes(standard_name='normalized_difference_chlorophyll_index_750_705')
					NDVI705 = ndvi[0].getValue().ravel()[0]
	
					# TODO: Map the remaining ~50 variables in BETY to create indices file
					# TODO: In netCDF header,
	
					csv_header = 'local_datetime,NDVI705,access_level,species,site,' \
								 'citation_author,citation_year,citation_title,method'
					csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \
							   'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % (
									timestamp, NDVI705, plot_no)
					with open(csv_file, 'w') as c:
						c.write(csv_header+'\n'+csv_vals)
	
				# TODO: Send this CSV to betydb & geostreams extractors instead
				submit_traits(csv_file, bety_key=self.bety_key)
			"""

        self.end_message(resource)
Exemple #16
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get BIN file and metadata
        bin_file, terra_md_full = None, None
        for f in resource['local_paths']:
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera')
            elif f.endswith('_ir.bin'):
                bin_file = f
        if None in [bin_file, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp)
        tiff_path = self.sensors.create_sensor_path(timestamp)
        png_path = tiff_path.replace(".tif", ".png")
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product
        self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
        remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
        terra_md_trim = get_terraref_metadata(all_dsmd)
        if updated_experiment is not None:
            terra_md_trim['experiment_metadata'] = updated_experiment
        terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
        level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        skipped_png = False
        if not file_exists(png_path) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating & uploading %s" % png_path)
            raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
            raw_data = numpy.rot90(raw_data, 3)
            create_image(raw_data, png_path, self.scale_values)
            self.created += 1
            self.bytes += os.path.getsize(png_path)
        else:
            skipped_png = True
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        if not file_exists(tiff_path) or self.overwrite:
            # Generate temperature matrix and perform actual processing
            self.log_info(resource, "creating & uploading %s" % tiff_path)
            gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box'])
            if skipped_png:
                raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
                raw_data = numpy.rot90(raw_data, 3)
            tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature
            create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full)
            self.created += 1
            self.bytes += os.path.getsize(tiff_path)
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']

        left_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                            opts=['left'])
        right_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                             opts=['right'])
        uploaded_file_ids = []

        left_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['right']['bounding_box'])

        if not file_exists(left_rgb_enh_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % left_rgb_enh_tiff)
            EI = getEnhancedImage(img_left)
            create_geotiff(EI, left_bounds, left_rgb_enh_tiff)
            self.created += 1
            self.bytes += os.path.getsize(left_rgb_enh_tiff)

        found_in_dest = check_file_in_dataset(connector,
                                              host,
                                              secret_key,
                                              target_dsid,
                                              left_rgb_enh_tiff,
                                              remove=self.overwrite)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_rgb_enh_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       left_rgb_enh_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        if not file_exists(right_rgb_enh_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % right_rgb_enh_tiff)
            EI = getEnhancedImage(img_right)
            create_geotiff(EI, right_bounds, right_rgb_enh_tiff)
            self.created += 1
            self.bytes += os.path.getsize(right_rgb_enh_tiff)

        found_in_dest = check_file_in_dataset(connector,
                                              host,
                                              secret_key,
                                              target_dsid,
                                              right_rgb_enh_tiff,
                                              remove=self.overwrite)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % right_rgb_enh_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       right_rgb_enh_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        self.log_info(resource, "uploading extractor metadata")
        remove_metadata(connector, host, secret_key, target_dsid,
                        self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, target_dsid, ext_meta)

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, terra_md_full = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.bin'):
                img_left = fname
            elif fname.endswith('_right.bin'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product if necessary
        target_md = download_metadata(connector, host, secret_key, target_dsid)
        if not get_extractor_metadata(target_md, self.extractor_info['name']):
            self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
            remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
            terra_md_trim = get_terraref_metadata(all_dsmd)
            if updated_experiment is not None:
                terra_md_trim['experiment_metadata'] = updated_experiment
            terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
            level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
            upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        try:
            left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left')
            gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box'])
            right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right')
            gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box'])
        except KeyError:
            self.log_error(resource, "spatial metadata not properly identified; sending to cleaner")
            submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner")
            return

        if (not file_exists(left_tiff)) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating %s" % left_tiff)
            left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None)
            create_geotiff(left_image, gps_bounds_left, left_tiff, None, True,
                           self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(left_tiff)
        # Check if the file should be uploaded, even if it was already created
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)


        if (not file_exists(right_tiff)) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating %s" % right_tiff)
            right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None)
            create_geotiff(right_image, gps_bounds_right, right_tiff, None, True,
                           self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(right_tiff)
        # Check if the file should be uploaded, even if it was already created
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % right_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            try:
                upload_metadata(connector, host, secret_key, resource['id'], extractor_md)
            except:
                self.log_info(resource, "problem uploading extractor metadata...")

        self.end_message(resource)
Exemple #19
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        """Process the message requesting the ODM extractor to run

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            resource(dict): dictionary containing the resources associated with the request
            parameters(json): json object of the triggering message contents
        """

        # Start of message processing
        self.start_message(resource)
        TerrarefExtractor.process_message(self, connector, host, secret_key,
                                          resource, parameters)

        # Handle any parameters
        if isinstance(parameters, basestring):
            parameters = json.loads(parameters)
        if isinstance(parameters, unicode):
            parameters = json.loads(str(parameters))

        # Array of files to upload once processing is done
        self.files_to_upload = []

        # Our cache of files to upload
        self.cache_folder = tempfile.mkdtemp()

        # We are only handling one sensor type here. ODM generates additional sensor outputs
        # that may not be available for upload; we handle those as we see them in upload_file()
        # above
        sensor_type = "rgb"

        # Initialize more local variables
        scan_name = parameters["scan_type"] if "scan_type" in parameters else ""

        # Setup overrides and get the restore function
        restore_fn = self.setup_overrides(host, secret_key, resource)
        if not restore_fn:
            self.end_message(resource)
            return

        try:
            # Get the best timestamp
            timestamp = timestamp_to_terraref(
                self.find_timestamp(resource,
                                    resource['dataset_info']['name']))
            season_name, experiment_name, _ = self.get_season_and_experiment(
                timestamp, self.sensor_name)

            # Generate the file names
            out_tif_full = self.sensors.get_sensor_path(
                timestamp, opts=[sensor_type, scan_name]).replace(" ", "_")
            out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif")
            out_tif_medium = out_tif_full.replace(".tif", "_10pct.tif")
            out_png = out_tif_medium.replace(".tif", ".png")
            out_dir = os.path.dirname(out_tif_full)

            # Generate dictionary of sensor output folders and file names
            sensor_maps = {
                sensor_type: {
                    "dir": out_dir,
                    "name": os.path.basename(out_tif_full)
                }
            }
            fsm = self.filename_sensor_maps
            for one_map in fsm:
                cur_sensor = fsm[one_map]
                if not cur_sensor in sensor_maps:
                    sensor_path = self.sensors.get_sensor_path(
                        timestamp,
                        sensor=cur_sensor,
                        opts=[cur_sensor, scan_name]).replace(" ", "_")

                    sensor_maps[cur_sensor] = {
                        "dir": os.path.dirname(sensor_path),
                        "name": os.path.basename(sensor_path)
                    }
            self.sensor_maps = sensor_maps

            # Only generate what we need to by checking files on disk
            thumb_exists, med_exists, full_exists, png_exists, only_png = \
                                                                False, False, False, False, False

            if file_exists(out_tif_thumb):
                thumb_exists = True
            if file_exists(out_tif_medium):
                med_exists = True
            if file_exists(out_tif_full):
                full_exists = True
            if file_exists(out_png):
                png_exists = True
            if thumb_exists and med_exists and full_exists and not self.overwrite_ok:
                if png_exists:
                    self.log_skip(resource, "all outputs already exist")
                    return
                else:
                    self.log_info(resource, "all outputs already exist (10% PNG thumbnail must" \
                                            " still be generated)")
                    only_png = True

            # If we need the whole set of files, create them
            if not only_png:
                # Override the output file name. We don't save anything here because we'll override
                # it the next time through
                self.args.orthophotoname = os.path.splitext(
                    os.path.basename(out_tif_full))[0]

                # Run the stitch process
                OpenDroneMapStitch.process_message(self, connector, host,
                                                   secret_key, resource,
                                                   parameters)

                # Look up the name of the full sized orthomosaic
                basename = os.path.basename(out_tif_full)
                srcname = None
                for f in self.files_to_upload:
                    if f["dest_name"] == basename:
                        srcname = os.path.join(self.cache_folder,
                                               f["source_name"])
                        break

                # Generate other file sizes from the original orthomosaic
                if srcname and not file_exists(out_tif_medium):
                    self.log_info(
                        resource,
                        "Converting orthomosaic to %s..." % out_tif_medium)
                    outname = os.path.join(self.cache_folder,
                                           os.path.basename(out_tif_medium))
                    cmd = "gdal_translate -outsize %s%% %s%% %s %s" % (
                        10, 10, srcname, outname)
                    subprocess.call(cmd, shell=True)

                if srcname and not file_exists(out_tif_thumb):
                    self.log_info(
                        resource,
                        "Converting orthomosaic to %s..." % out_tif_thumb)
                    outname = os.path.join(self.cache_folder,
                                           os.path.basename(out_tif_thumb))
                    cmd = "gdal_translate -outsize %s%% %s%% %s %s" % (
                        2, 2, srcname, outname)
                    subprocess.call(cmd, shell=True)

            # We're here due to possibly needing the PNG Thumbnail
            srcname = os.path.join(self.cache_folder,
                                   os.path.basename(out_tif_medium))
            if (only_png or not png_exists) and file_exists(srcname):
                # Create PNG thumbnail
                self.log_info(resource, "Converting 10pct to %s..." % out_png)
                outname = os.path.join(self.cache_folder,
                                       os.path.basename(out_png))
                cmd = "gdal_translate -of PNG %s %s" % (srcname, outname)
                subprocess.call(cmd, shell=True)

            # Get dataset ID or create it, creating parent collections as needed
            leaf_ds_name = self.sensors.get_display_name() + ' - ' + timestamp
            ds_exists = get_datasetid_by_name(host, secret_key, leaf_ds_name)
            target_dsid = build_dataset_hierarchy_crawl(
                host,
                secret_key,
                self.clowder_user,
                self.clowder_pass,
                self.clowderspace,
                season_name,
                experiment_name,
                self.sensors.get_display_name(),
                timestamp[:4],
                timestamp[5:7],
                timestamp[8:10],
                leaf_ds_name=leaf_ds_name)

            if (self.overwrite_ok
                    or not ds_exists) and self.experiment_metadata:
                self.update_dataset_extractor_metadata(
                    connector, host, secret_key, target_dsid,
                    prepare_pipeline_metadata(self.experiment_metadata),
                    self.extractor_info['name'])

            # Store our dataset mappings for possible later use
            self.sensor_dsid_map = {sensor_type: target_dsid}

            # Upload full field image to Clowder
            file_ids = []
            if "files" in resource:
                for one_file in resource["files"]:
                    file_ids.append(one_file.get("id", ""))
            content = {
                "comment": "This stitched file is computed using OpenDroneMap. Change the" \
                           " parameters in extractors-opendronemap.txt to change the results.",
                "source_file_ids": ", ".join(file_ids)
            }

            # If we newly created these files, upload to Clowder
            file_name = os.path.basename(out_tif_thumb)
            file_path = os.path.join(self.cache_folder, file_name)
            if file_exists(file_path) and not thumb_exists:
                self.files_to_upload.append({
                    "source_path": self.cache_folder,
                    "source_name": file_name,
                    "dest_path": out_dir,
                    "dest_name": file_name,
                    "compress": False
                })

            file_name = os.path.basename(out_tif_medium)
            file_path = os.path.join(self.cache_folder, file_name)
            if file_exists(file_path) and not med_exists:
                self.files_to_upload.append({
                    "source_path": self.cache_folder,
                    "source_name": file_name,
                    "dest_path": out_dir,
                    "dest_name": file_name,
                    "compress": False
                })

            file_name = os.path.basename(out_png)
            file_path = os.path.join(self.cache_folder, file_name)
            if file_exists(file_path) and not png_exists:
                self.files_to_upload.append({
                    "source_path": self.cache_folder,
                    "source_name": file_name,
                    "dest_path": out_dir,
                    "dest_name": file_name,
                    "compress": False
                })

            # The main orthomosaic is already getting uploaded, but we must make sure its path
            # is correct
            srcname = os.path.basename(out_tif_full).lower()
            for one_file in self.files_to_upload:
                file_name = os.path.basename(one_file["dest_name"]).lower()
                if file_name == srcname:
                    one_file["dest_path"] = os.path.dirname(out_tif_full)
                    break

            # This function uploads the files into their appropriate datasets
            self.perform_uploads(connector, host, secret_key, resource,
                                 target_dsid, content, season_name,
                                 experiment_name, timestamp)

            # Cleanup the all destination folders skipping over ones that are in our "base" path
            # (we want to keep those)
            base = self.sensors.base
            if not self.cache_folder.startswith(base):
                check_delete_folder(self.cache_folder)
            for sp in self.sensor_maps:
                if not self.sensor_maps[sp]["dir"].startswith(base):
                    check_delete_folder(self.sensor_maps[sp]["dir"])

        finally:
            # We are done, restore fields we've modified (also be sure to restore fields in the
            # early returns in the code above)
            if restore_fn:
                restore_fn()
            self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']

        left_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp,
                                                             opts=['left'])
        right_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp,
                                                              opts=['right'])
        uploaded_file_ids = []
        right_ratio, left_ratio = 0, 0

        left_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['right']['bounding_box'])
        #qual_md = get_extractor_metadata(all_dsmd, "terra.stereo-rgb.nrmac")
        if (not file_exists(left_rgb_mask_tiff)) or self.overwrite:
            self.log_info(resource, "creating %s" % left_rgb_mask_tiff)

            #if qual_md and 'left_quality_score' in qual_md:
            #left_ratio, left_rgb = gen_cc_enhanced(img_left, quality_score=int(qual_md['left_quality_score']))
            left_ratio, left_rgb = gen_cc_enhanced(img_left)

            if left_ratio is not None and left_rgb is not None:
                # Bands must be reordered to avoid swapping R and B
                left_rgb = cv2.cvtColor(left_rgb, cv2.COLOR_BGR2RGB)
                create_geotiff(left_rgb, left_bounds, left_rgb_mask_tiff, None,
                               False, self.extractor_info, terra_md_full)
                compress_geotiff(left_rgb_mask_tiff)
                self.created += 1
                self.bytes += os.path.getsize(left_rgb_mask_tiff)
            else:
                # If the masked version was not generated, delete any old version as well
                self.log_info(
                    resource, "a faulty version exists; deleting %s" %
                    left_rgb_mask_tiff)
                os.remove(left_rgb_mask_tiff)

        found_in_dest = check_file_in_dataset(connector, host, secret_key,
                                              target_dsid, left_rgb_mask_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_rgb_mask_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       left_rgb_mask_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        if not self.leftonly:
            if (not file_exists(right_rgb_mask_tiff)) or self.overwrite:

                right_ratio, right_rgb = gen_cc_enhanced(img_right)

                if right_ratio is not None and right_rgb is not None:
                    # Bands must be reordered to avoid swapping R and B
                    right_rgb = cv2.cvtColor(right_rgb, cv2.COLOR_BGR2RGB)
                    create_geotiff(right_rgb, right_bounds,
                                   right_rgb_mask_tiff, None, False,
                                   self.extractor_info, terra_md_full)
                    compress_geotiff(right_rgb_mask_tiff)
                    self.created += 1
                    self.bytes += os.path.getsize(right_rgb_mask_tiff)
                else:
                    # If the masked version was not generated, delete any old version as well
                    self.log_info(
                        resource, "a faulty version exists; deleting %s" %
                        right_rgb_mask_tiff)
                    os.remove(right_rgb_mask_tiff)

            found_in_dest = check_file_in_dataset(connector, host, secret_key,
                                                  target_dsid,
                                                  right_rgb_mask_tiff)
            if not found_in_dest:
                self.log_info(resource, "uploading %s" % right_rgb_mask_tiff)
                fileid = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           right_rgb_mask_tiff)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            md = {
                "files_created": uploaded_file_ids,
                "left_mask_ratio": left_ratio
            }
            if not self.leftonly:
                md["right_mask_ratio"] = right_ratio
            extractor_md = build_metadata(host, self.extractor_info,
                                          target_dsid, md, 'dataset')
            self.log_info(resource,
                          "uploading extractor metadata to Lv1 dataset")
            remove_metadata(connector, host, secret_key, resource['id'],
                            self.extractor_info['name'])
            upload_metadata(connector, host, secret_key, resource['id'],
                            extractor_md)

        self.end_message(resource)
Exemple #21
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Load metadata from dataset
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd)
                if 'spatial_metadata' in terra_md_full:
                    spatial_meta = terra_md_full['spatial_metadata']
                else:
                    spatial_meta = None
        if not spatial_meta:
            ValueError("No spatial metadata found.")

        # Determine which files in dataset need clipping
        files_to_process = {}
        for f in resource['local_paths']:
            if f.startswith("ir_geotiff") and f.endswith(".tif"):
                sensor_name = "ir_geotiff"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta['flirIrCamera']['bounding_box']
                }

            elif f.startswith("rgb_geotiff") and f.endswith(".tif"):
                sensor_name = "rgb_geotiff"
                filename = os.path.basename(f)
                if f.endswith("_left.tif"): side = "left"
                else:                       side = "right"
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta[side]['bounding_box']
                }

            elif f.endswith(".las"):
                sensor_name = "laser3d_las"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": get_las_extents(f)
                }

            # TODO: Add case for laser3d heightmap

        # Fetch experiment name from terra metadata
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine script name
        target_scan = "unknown_scan"
        if 'gantry_variable_metadata' in terra_md_full:
            if 'script_name' in terra_md_full['gantry_variable_metadata']:
                target_scan = terra_md_full['gantry_variable_metadata']['script_name']
                if 'script_hash' in terra_md_full['gantry_variable_metadata']:
                    target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash']

        all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa')
        uploaded_file_ids = []

        for filename in files_to_process:
            file_path = files_to_process[filename]["path"]
            file_bounds = files_to_process[filename]["bounds"]

            overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True)

            if len(overlap_plots) > 0:
                self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots)))
                for plotname in overlap_plots:
                    plot_bounds = overlap_plots[plotname]
                    tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds))

                    plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)"
                    leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0]
                    self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name,
                                                                                             timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset))
                    target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                                                season_name, experiment_name, plot_display_name,
                                                                timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset)

                    out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename)
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite):
                        """If file is a geoTIFF, simply clip it and upload it to Clowder"""
                        clip_raster(file_path, tuples, out_path=out_file)

                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                    elif filename.endswith(".las"):
                        """If file is LAS, we can merge with any existing scan+plot output safely"""
                        merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las")
                        merged_txt = merged_out.replace(".las", "_contents.txt")

                        already_merged = False
                        if os.path.exists(merged_txt):
                            # Check if contents
                            with open(merged_txt, 'r') as contents:
                                for entry in contents.readlines():
                                    if entry.strip() == file_path:
                                        already_merged = True
                                        break
                        if not already_merged:
                            clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out)
                            with open(merged_txt, 'a') as contents:
                                contents.write(file_path+"\n")

                        # Upload the individual plot shards for optimizing las2height later
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(out_file)

                        # Upload the merged result if necessary
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                        # Trigger las2height extractor
                        submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height")


        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        extractor_md = build_metadata(host, self.extractor_info, resource['id'], {
            "files_created": uploaded_file_ids
        }, 'dataset')
        self.log_info(resource, "uploading extractor metadata to Level_1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)
Exemple #22
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']
        left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        self.log_info(resource, "determining image quality")
        left_qual = getImageQuality(img_left)
        if not self.leftonly:
            right_qual = getImageQuality(img_right)

        left_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box'])

        if not file_exists(left_nrmac_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % left_nrmac_tiff)
            create_geotiff(np.array([[left_qual, left_qual],[left_qual, left_qual]]), left_bounds,
                           left_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(left_nrmac_tiff)
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_nrmac_tiff,
                                              remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            self.log_info(resource, "uploading %s" % left_nrmac_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid,
                                       left_nrmac_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)


        if not self.leftonly:
            if (not file_exists(right_nrmac_tiff) or self.overwrite):
                self.log_info(resource, "creating %s" % right_nrmac_tiff)
                create_geotiff(np.array([[right_qual, right_qual],[right_qual, right_qual]]), right_bounds,
                               right_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True)
                self.created += 1
                self.bytes += os.path.getsize(right_nrmac_tiff)
            found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_nrmac_tiff,
                                                  remove=self.overwrite)
            if not found_in_dest or self.overwrite:
                self.log_info(resource, "uploading %s" % right_nrmac_tiff)
                fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid,
                                           right_nrmac_tiff)
                uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        md = {
            "files_created": uploaded_file_ids,
            "left_quality_score": left_qual
        }
        if not self.leftonly:
            md["right_quality_score"] = right_qual
        extractor_md = build_metadata(host, self.extractor_info, resource['id'], md, 'file')
        self.log_info(resource, "uploading extractor metadata to Lv1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)