def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.bin', '_right.bin']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if not self.overwrite and get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if file_exists(left_tiff) and file_exists(right_tiff):
                    self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_error(resource, "no terraref metadata found; sending to cleaner")
            submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner")
            return CheckMessage.ignore
Example #2
0
    def submit_missing_timestamps(sensor_name, target, date):
        sensordef = count_defs[sensor_name]
        targetdef = sensordef[target]
        extractorname = targetdef["extractor"]
        submitted = []
        notfound = []

        if "parent" in targetdef:
            # Count expected parent counts & actual current progress counts from filesystem
            parentdef = sensordef[targetdef["parent"]]
            parent_dir = os.path.join(parentdef["path"], date)
            target_dir = os.path.join(targetdef["path"], date)
            parent_timestamps = os.listdir(parent_dir)
            if os.path.isdir(target_dir):
                target_timestamps = os.listdir(target_dir)
            else:
                target_timestamps = []

            disp_name = Sensors("", "ua-mac").get_display_name(targetdef["parent"])
            missing = list(set(parent_timestamps)-set(target_timestamps))
            for ts in missing:
                if ts.find("-") > -1 and ts.find("__") > -1:
                    dataset_name = disp_name+" - "+ts
                    raw_dsid = get_dsid_by_name(dataset_name)
                    if raw_dsid:
                        submit_extraction(CONN, CLOWDER_HOST, CLOWDER_KEY, raw_dsid, extractorname)
                        submitted.append({"name": dataset_name, "id": raw_dsid})
                    else:
                        notfound.append({"name": dataset_name})

        return json.dumps({
            "extractor": extractorname,
            "datasets submitted": submitted,
            "datasets not found": notfound
        })
Example #3
0
    def submit_rulecheck(sensor_name, target, date):
        sensordef = count_defs[sensor_name]
        targetdef = sensordef[target]
        submitted = []

        s = Sensors("", "ua-mac")

        if "parent" in targetdef:
            target_dir = os.path.join(sensordef[targetdef["parent"]]["path"], date)
            target_timestamps = os.listdir(target_dir)

            disp_name = s.get_display_name(targetdef["parent"])

            for ts in target_timestamps:
                if ts.find("-") > -1 and ts.find("__") > -1: # TODO: and os.listdir(os.path.join(target_dir, ts)):
                    # Get first populated timestamp for the date that has a Clowder ID
                    dataset_name = disp_name+" - "+ts
                    raw_dsid = get_dsid_by_name(dataset_name)
                    if raw_dsid:
                        # Submit associated Clowder ID to rulechecker
                        submit_extraction(CONN, CLOWDER_HOST, CLOWDER_KEY, raw_dsid, "ncsa.rulechecker.terra")
                        submitted.append({"name": dataset_name, "id": raw_dsid})
                        break

        return json.dumps({
            "extractor": "ncsa.rulechecker.terra",
            "datasets submitted": submitted
        })
Example #4
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.tif', '_right.tif']):
            self.log_skip(resource, "missing required files")
            # Check for raw_data_source in metadata and resumbit to bin2tif if available...
            md = download_metadata(connector, host, secret_key, resource['id'])
            terra_md = get_terraref_metadata(md)
            if 'raw_data_source' in terra_md:
                raw_id = str(terra_md['raw_data_source'].split("/")[-1])
                self.log_info(resource, "submitting raw source %s to bin2tif" % raw_id)
                submit_extraction(connector, host, secret_key, raw_id, "terra.stereo-rgb.bin2tif")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if (self.leftonly and file_exists(left_nrmac_tiff)) or (
                                not self.leftonly and file_exists(left_nrmac_tiff) and file_exists(right_nrmac_tiff)):
                    if contains_required_files(resource, [os.path.basename(left_nrmac_tiff)]):
                        self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
                        return CheckMessage.ignore
                    else:
                        self.log_info(resource, "output file exists but not yet uploaded")
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            return CheckMessage.ignore
Example #5
0
    def submit_missing_timestamps_from_rulechecker(sensor_name, target, date):
        sensordef = count_defs[sensor_name]
        targetdef = sensordef[target]
        extractorname = targetdef["extractor"]
        submitted = []
        notfound = []

        if "parent" in targetdef:
            # Count expected parent counts from filesystem
            parentdef = sensordef[targetdef["parent"]]
            parent_dir = os.path.join(parentdef["path"], date)
            parent_timestamps = os.listdir(parent_dir)

            # Count actual current progress counts from PSQL
            psql_conn = connect_to_psql()

            target_timestamps = []
            query_string = targetdef["query_list"] % date
            curs = psql_conn.cursor()
            curs.execute(query_string)
            for result in curs:
                target_timestamps.append(result[0].split("/")[-2])

            disp_name = Sensors("", "ua-mac").get_display_name(targetdef["parent"])
            missing = list(set(parent_timestamps)-set(target_timestamps))
            for ts in missing:
                if ts.find("-") > -1 and ts.find("__") > -1:
                    dataset_name = disp_name+" - "+ts
                    raw_dsid = get_dsid_by_name(dataset_name)
                    if raw_dsid:
                        submit_extraction(CONN, CLOWDER_HOST, CLOWDER_KEY, raw_dsid, extractorname)
                        submitted.append({"name": dataset_name, "id": raw_dsid})
                    else:
                        notfound.append({"name": dataset_name})

        return json.dumps({
            "extractor": extractorname,
            "datasets submitted": submitted,
            "datasets not found": notfound
        })
Example #6
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Load metadata from dataset
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd)
                if 'spatial_metadata' in terra_md_full:
                    spatial_meta = terra_md_full['spatial_metadata']
                else:
                    spatial_meta = None
        if not spatial_meta:
            ValueError("No spatial metadata found.")

        # Determine which files in dataset need clipping
        files_to_process = {}
        for f in resource['local_paths']:
            if f.startswith("ir_geotiff") and f.endswith(".tif"):
                sensor_name = "ir_geotiff"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta['flirIrCamera']['bounding_box']
                }

            elif f.startswith("rgb_geotiff") and f.endswith(".tif"):
                sensor_name = "rgb_geotiff"
                filename = os.path.basename(f)
                if f.endswith("_left.tif"): side = "left"
                else:                       side = "right"
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta[side]['bounding_box']
                }

            elif f.endswith(".las"):
                sensor_name = "laser3d_las"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": get_las_extents(f)
                }

            # TODO: Add case for laser3d heightmap

        # Fetch experiment name from terra metadata
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine script name
        target_scan = "unknown_scan"
        if 'gantry_variable_metadata' in terra_md_full:
            if 'script_name' in terra_md_full['gantry_variable_metadata']:
                target_scan = terra_md_full['gantry_variable_metadata']['script_name']
                if 'script_hash' in terra_md_full['gantry_variable_metadata']:
                    target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash']

        all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa')
        uploaded_file_ids = []

        for filename in files_to_process:
            file_path = files_to_process[filename]["path"]
            file_bounds = files_to_process[filename]["bounds"]

            overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True)

            if len(overlap_plots) > 0:
                self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots)))
                for plotname in overlap_plots:
                    plot_bounds = overlap_plots[plotname]
                    tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds))

                    plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)"
                    leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0]
                    self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name,
                                                                                             timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset))
                    target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                                                season_name, experiment_name, plot_display_name,
                                                                timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset)

                    out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename)
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite):
                        """If file is a geoTIFF, simply clip it and upload it to Clowder"""
                        clip_raster(file_path, tuples, out_path=out_file)

                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                    elif filename.endswith(".las"):
                        """If file is LAS, we can merge with any existing scan+plot output safely"""
                        merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las")
                        merged_txt = merged_out.replace(".las", "_contents.txt")

                        already_merged = False
                        if os.path.exists(merged_txt):
                            # Check if contents
                            with open(merged_txt, 'r') as contents:
                                for entry in contents.readlines():
                                    if entry.strip() == file_path:
                                        already_merged = True
                                        break
                        if not already_merged:
                            clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out)
                            with open(merged_txt, 'a') as contents:
                                contents.write(file_path+"\n")

                        # Upload the individual plot shards for optimizing las2height later
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(out_file)

                        # Upload the merged result if necessary
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                        # Trigger las2height extractor
                        submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height")


        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        extractor_md = build_metadata(host, self.extractor_info, resource['id'], {
            "files_created": uploaded_file_ids
        }, 'dataset')
        self.log_info(resource, "uploading extractor metadata to Level_1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, terra_md_full = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.bin'):
                img_left = fname
            elif fname.endswith('_right.bin'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product if necessary
        target_md = download_metadata(connector, host, secret_key, target_dsid)
        if not get_extractor_metadata(target_md, self.extractor_info['name']):
            self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
            remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
            terra_md_trim = get_terraref_metadata(all_dsmd)
            if updated_experiment is not None:
                terra_md_trim['experiment_metadata'] = updated_experiment
            terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
            level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
            upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        try:
            left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left')
            gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box'])
            right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right')
            gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box'])
        except KeyError:
            self.log_error(resource, "spatial metadata not properly identified; sending to cleaner")
            submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner")
            return

        if (not file_exists(left_tiff)) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating %s" % left_tiff)
            left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None)
            create_geotiff(left_image, gps_bounds_left, left_tiff, None, True,
                           self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(left_tiff)
        # Check if the file should be uploaded, even if it was already created
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)


        if (not file_exists(right_tiff)) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating %s" % right_tiff)
            right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None)
            create_geotiff(right_image, gps_bounds_right, right_tiff, None, True,
                           self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(right_tiff)
        # Check if the file should be uploaded, even if it was already created
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % right_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            try:
                upload_metadata(connector, host, secret_key, resource['id'], extractor_md)
            except:
                self.log_info(resource, "problem uploading extractor metadata...")

        self.end_message(resource)
Example #8
0
	def process_message(self, connector, host, secret_key, resource, parameters):
		self.start_message(resource)

		sensor_type, timestamp = resource['name'].split(" - ")

		# First, re-check metadata to verify it hasn't been added in meantime
		ds_md = download_metadata(connector, host, secret_key, resource['id'])
		terra_md = get_terraref_metadata(ds_md)
		if terra_md:
			self.log_info(resource, "Found TERRA-REF metadata; not cleaning")
			return

		# These datasets do not have TERRA md
		uncleanables = ["Full Field"]
		if sensor_type in uncleanables:
			self.log_info(resource, "Cannot clean metadata for %s" % sensor_type)
			return

		# For these datasets, we must get TERRA md from raw_data source
		lv1_types = {"RGB GeoTIFFs": "stereoTop",
					 "Thermal IR GeoTIFFs": "flirIrCamera"}
		if sensor_type in lv1_types:
			raw_equiv = resource['name'].replace(sensor_type, lv1_types[sensor_type])
			source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(raw_equiv))
		else:
			# Search for metadata.json source file
			source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(resource['name']))
		source_dir = self.remapMountPath(connector, source_dir)

		if self.delete:
			# Delete all existing metadata from this dataset
			self.log_info(resource, "Deleting existing metadata")
			delete_dataset_metadata(host, self.clowder_user, self.clowder_pass, resource['id'])

		# TODO: split between the PLY files (in Level_1) and metadata.json files - unique to this sensor
		if sensor_type == "scanner3DTop":
			source_dir = source_dir.replace("Level_1", "raw_data")

		self.log_info(resource, "Searching for metadata.json in %s" % source_dir)
		if os.path.isdir(source_dir):
			md_file = None
			for f in os.listdir(source_dir):
				if f.endswith("metadata.json"):
					md_file = os.path.join(source_dir, f)
			if md_file:
				self.log_info(resource, "Found metadata.json; cleaning")
				md_json = clean_metadata(load_json_file(md_file), sensor_type)
				format_md = {
					"@context": ["https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
								 {"@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#"}],
					"content": md_json,
					"agent": {
						"@type": "cat:user",
						"user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % self.userid
					}
				}
				self.log_info(resource, "Uploading cleaned metadata")
				upload_metadata(connector, host, secret_key, resource['id'], format_md)

				# Now trigger a callback extraction if given
				if len(self.callback) > 0:
					self.log_info(resource, "Submitting callback extraction to %s" % self.callback)
					submit_extraction(connector, host, secret_key, resource['id'], self.callback)
				else:
					callbacks = self.get_callbacks_by_sensor(sensor_type)
					if callbacks:
						for c in callbacks:
							self.log_info(resource, "Submitting callback extraction to %s" % c)
							submit_extraction(connector, host, secret_key, resource['id'], c)
					else:
						self.log_info(resource, "No default callback found for %s" % sensor_type)
			else:
				self.log_error(resource, "metadata.json not found in %s" % source_dir)

		else:
			self.log_error(resource, "%s could not be found" % source_dir)

		# TODO: Have extractor check for existence of Level_1 output product and delete if exists?

		self.end_message(resource)
Example #9
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get BIN file and metadata
        bin_file, terra_md_full = None, None
        for f in resource['local_paths']:
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera')
            elif f.endswith('_ir.bin'):
                bin_file = f
        if None in [bin_file, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp)
        tiff_path = self.sensors.create_sensor_path(timestamp)
        png_path = tiff_path.replace(".tif", ".png")
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product
        self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
        remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
        terra_md_trim = get_terraref_metadata(all_dsmd)
        if updated_experiment is not None:
            terra_md_trim['experiment_metadata'] = updated_experiment
        terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
        level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        skipped_png = False
        if not file_exists(png_path) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating & uploading %s" % png_path)
            raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
            raw_data = numpy.rot90(raw_data, 3)
            create_image(raw_data, png_path, self.scale_values)
            self.created += 1
            self.bytes += os.path.getsize(png_path)
        else:
            skipped_png = True
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        if not file_exists(tiff_path) or self.overwrite:
            # Generate temperature matrix and perform actual processing
            self.log_info(resource, "creating & uploading %s" % tiff_path)
            gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box'])
            if skipped_png:
                raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
                raw_data = numpy.rot90(raw_data, 3)
            tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature
            create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full)
            self.created += 1
            self.bytes += os.path.getsize(tiff_path)
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)
Example #10
0
	def process_message(self, connector, host, secret_key, resource, parameters):
		self.start_message()

		sensor_type, timestamp = resource['name'].split(" - ")
		targets = self.get_targets_by_sensor(sensor_type)
		source = self.get_source_by_sensor(sensor_type)
		existing_files = {}
		for t in targets:
			for f in resource['files']:
				if f['filename'].endswith(t):
					logging.getLogger(__name__).info("Found %s" % f['filename'])
					existing_files[t] = f['filename']
					break

		if len(existing_files) == len(targets):
			logging.getLogger(__name__).info("Target files already exist")

			# If there are bin2tif files previously created, are they valid?
			dsmd = download_metadata(connector, host, secret_key, resource['id'])
			for md in dsmd:
				if 'extractor_id' in md['agent'] and md['agent']['extractor_id'].endswith(source):
					# Found bin2tif metadata - are previously created files valid?
					logging.getLogger(__name__).info("Found metadata from %s" % source)
					for url in md['content']['files_created']:
						fid = url.split("/")[-1]
						i = download_info(connector, host, secret_key, fid)
						i = self.remapMountPath(connector, i['filepath'])
						logging.getLogger(__name__).info("Checking validity of %s" % i)
						if not os.path.isfile(i):
							# Found invalid file - nuke the entire site from orbit
							logging.getLogger(__name__).info("Invalid; deleting metadata")
							self.delete_dataset_metadata(host, self.clowder_user, self.clowder_pass, resource['id'], source)

							# Now trigger a callback extraction if given
							if len(self.callback) > 0:
								logging.getLogger(__name__).info("Submitting callback extraction to %s" % self.callback)
								submit_extraction(connector, host, secret_key, resource['id'], self.callback)
							else:
								callbacks = self.get_callbacks_by_sensor(sensor_type)
								if callbacks:
									for c in callbacks:
										logging.getLogger(__name__).info("Submitting callback extraction to %s" % c)
										submit_extraction(connector, host, secret_key, resource['id'], c)
								else:
									logging.getLogger(__name__).info("No default callback found for %s" % sensor_type)

							break

		else:
			# Search for target source files
			source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(resource['name']))
			source_dir = self.remapMountPath(connector, source_dir)
			if sensor_type == "scanner3DTop":
				source_dir = source_dir.replace("Level_1", "raw_data")

			logging.getLogger(__name__).info("Searching for target files in %s" % source_dir)

			if os.path.isdir(source_dir):
				targ_files = {}
				for f in os.listdir(source_dir):
					for t in targets:
						if f.endswith(t):
							targ_files[t] = os.path.join(source_dir, f)
							break

				if targ_files != {}:
					for t in targ_files:
						logging.getLogger(__name__).info("Uploading %s" % (targ_files[t]))
						upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['id'], targ_files[t])

					# Now trigger a callback extraction if given
					if len(self.callback) > 0:
						logging.getLogger(__name__).info("Submitting callback extraction to %s" % self.callback)
						submit_extraction(connector, host, secret_key, resource['id'], self.callback)
					else:
						callbacks = self.get_callbacks_by_sensor(sensor_type)
						if callbacks:
							for c in callbacks:
								logging.getLogger(__name__).info("Submitting callback extraction to %s" % c)
								submit_extraction(connector, host, secret_key, resource['id'], c)
						else:
							logging.getLogger(__name__).info("No default callback found for %s" % sensor_type)
				else:
					logging.getLogger(__name__).error("targets not found in %s" % source_dir)

			else:
				logging.getLogger(__name__).info("%s could not be found" % source_dir)
                        "@type": "cat:user",
                        "user_id":
                        "%sapi/users/%s" % (clowder_host, clowder_uid)
                    }
                }
                if not dry_run:
                    file_id = upload_to_dataset(conn, clowder_host,
                                                clowder_user, clowder_pass,
                                                snap_dataset, img_path)
                    logger.debug("Created file %s [%s]" % (img_file, file_id))
                    file_md["file_id"] = file_id
                    upload_file_metadata(conn, clowder_host, clowder_key,
                                         file_id, file_md)
                    logger.debug("Uploaded metadata to [%s]" % file_id)
                else:
                    logger.debug("Skipping file %s [%s]" %
                                 (img_file, "DRY RUN"))

            # Submit new dataset for extraction to plantCV extractor
            if not dry_run:
                extractor = "terra.lemnatec.plantcv"
                logger.debug("Submitting dataset [%s] to %s" %
                             (snap_dataset, extractor))
                submit_extraction(conn, clowder_host, clowder_key,
                                  snap_dataset, extractor)

    logger.debug("Experiment uploading complete.")

else:
    logger.debug("%s does not exist" % experiment_root)
    sys.exit(1)
CONN = Connector(None, mounted_paths={"/home/clowder/sites": args.sites})

logging.info("attempting to parse %s" % args.input)
sess = requests.Session()

if args.daily:
    seen_days = []
with open(args.input, 'r') as csv:
    i = 0
    for line in csv:
        ds_id, ds_name = line.replace("\n", "").split(",")
        if len(ds_id) > 0:
            if args.daily:
                day = ds_name.split(" - ")[1].split("__")[0]
                if day in seen_days:
                    continue
                else:
                    seen_days.append(day)
            try:
                submit_extraction(CONN, args.host, args.key, ds_id,
                                  args.extractor)
            except Exception as e:
                logging.info("failed to submit %s [%s]" % (ds_id, e))
        i += 1
        if (i % 1000 == 0):
            logging.info("submitted %s files" % i)
        if args.test:
            logging.info("submitted %s" % ds_id)
            break
logging.info("processing completed")