コード例 #1
0
def upload_ds(conn, host, key, sensor, date, timestamp, ds_files, ds_meta):
    if len(ds_files) > 0:
        year, month, dd = date.split("-")
        if DRY_RUN:
            log("[%s] %s files" % (sensor + ' - ' + timestamp, len(ds_files)))
            return

        if TIMESTAMP_FOLDER:
            dataset_id = build_dataset_hierarchy(CLOWDER_HOST, CLOWDER_KEY,
                                                 CLOWDER_USER, CLOWDER_PASS,
                                                 SPACE_ID, sensor, year, month,
                                                 dd,
                                                 sensor + ' - ' + timestamp)
        else:
            dataset_id = build_dataset_hierarchy(CLOWDER_HOST,
                                                 CLOWDER_KEY,
                                                 CLOWDER_USER,
                                                 CLOWDER_PASS,
                                                 SPACE_ID,
                                                 sensor,
                                                 year,
                                                 month,
                                                 leaf_ds_name=sensor + ' - ' +
                                                 date)

        log("adding files to Clowder dataset %s" % dataset_id)

        for FILEPATH in ds_files:
            upload_to_dataset(CONN, CLOWDER_HOST, CLOWDER_KEY, dataset_id,
                              FILEPATH)
        if len(ds_meta.keys()) > 0:
            log("adding metadata to Clowder dataset %s" % dataset_id)
            format_md = {
                "@context": [
                    "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
                    {
                        "@vocab":
                        "https://terraref.ncsa.illinois.edu/metadata/uamac#"
                    }
                ],
                "content":
                ds_meta,
                "agent": {
                    "@type":
                    "cat:user",
                    "user_id":
                    "https://terraref.ncsa.illinois.edu/clowder/api/users/58e2a7b9fe3ae3efc1632ae8"
                }
            }
            upload_metadata(CONN, CLOWDER_HOST, CLOWDER_KEY, dataset_id,
                            format_md)
コード例 #2
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        inPath = resource['local_paths'][0]

        # Determine output file path
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        out_file = self.create_sensor_path(timestamp,
                                           opts=['extracted_values'])
        uploaded_file_ids = []

        target_dsid = build_dataset_hierarchy(
            connector,
            host,
            secret_key,
            self.clowderspace,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[:7],
            timestamp[:10],
            leaf_ds_name=resource['dataset_info']['name'])

        # Extract NDVI values
        if not os.path.isfile(out_file) or self.overwrite:
            logging.info("...writing values to: %s" % out_file)
            data = open(inPath, "rb").read()
            values = float(data[49:66])
            data.close()
            with open(out_file, 'wb') as csvfile:
                fields = ['file_name', 'NDVI']  # fields name for csv file
                wr = csv.DictWriter(csvfile,
                                    fieldnames=fields,
                                    lineterminator='\n')
                wr.writeheader()
                wr.writerow({'file_name': resource['name'], 'NDVI': values})

            # TODO: Send this to geostreams

            fileid = upload_to_dataset(connector, host, secret_key,
                                       target_dsid, out_file)
            uploaded_file_ids.append(fileid)

            self.created += 1
            self.bytes += os.path.getsize(out_file)
        else:
            logging.info("%s already exists; skipping %s" %
                         (out_file, resource['id']))

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, metadata)

        self.end_message()
コード例 #3
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message()

        for p in resource['local_paths']:
            if p.endswith(".bin"):
                input_dir = p.replace(os.path.basename(p), '')
                # TODO: Eventually light may be in separate location
                input_dir_light = input_dir

        # Determine output directory
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        out_name_base = self.sensors.create_sensor_path(timestamp, ext='')
        uploaded_file_ids = []

        subprocess.call(["octave --eval \"PSII(\'%s\',\'%s\' ,\'%s\')\"" %
                         (input_dir, input_dir_light, out_name_base)],shell=True);

        target_dsid = build_dataset_hierarchy(connector, host, secret_key, self.clowderspace,
                                              self.sensors.get_display_name(), timestamp[:4], timestamp[:7],
                                              timestamp[:10], leaf_ds_name=resource['dataset_info']['name'])

        for out_file in ["_Fm_dark", "_Fv_dark", "_FvFm_dark", "_Fm_light", "_Fv_light", "_FvFm_light",
                         "_Phi_PSII", "_NPQ", "_qN", "_qP", "_Rfd"]:
            full_out_name = out_name_base + out_file + ".png"
            if os.path.isfile(full_out_name) and full_out_name not in resource["local_paths"]:
                fileid = upload_to_dataset(connector, host, secret_key, target_dsid, full_out_name)
                uploaded_file_ids.append(fileid)
            self.created += 1
            self.bytes += os.path.getsize(full_out_name)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid, {
            "files_created": uploaded_file_ids}, 'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        self.end_message()
コード例 #4
0
	def process_message(self, connector, host, secret_key, resource, parameters):
		self.start_message()

		# Find input files in dataset
		target_files = {
			'raw': None,
			'raw.hdr': None,
			'image.jpg': None,
			'frameIndex.txt': None,
			'settings.txt': None,
			"_metadata.json": None
		}

		metafile = None
		ds_metafile = None
		last_path = None
		path_match = None
		tempdir = None
		symlinks = []
		for f in resource['local_paths']:
			for fileExt in target_files.keys():
				if f.endswith(fileExt):
					if fileExt != '_metadata.json':
						filedir = os.path.dirname(f)
						if not last_path:
							last_path = filedir
						else:
							if filedir != last_path:
								path_match = False
							last_path = filedir
						target_files[fileExt] = {'filename': os.path.basename(f), 'path': f}
					else:
						if f.endswith('/_dataset_metadata.json'):
							ds_metafile = f
						elif not f.endswith('/_metadata.json'):
							metafile = f
							target_files['_metadata.json'] = {'filename': os.path.basename(metafile),
															  'path': metafile}

		# Identify md file either with other dataset files, or attached to Clowder dataset
		if metafile == None:
			if ds_metafile != None:
				# Found dataset metadata, so check for the .json file alongside other files
				logging.getLogger(__name__).info("...checking for local metadata file alongside other files")
				ds_dir = os.path.dirname(target_files['raw']['path'])
				for ds_f in os.path.listdir(ds_dir):
					if ds_f.endswith("_metadata.json"):
						target_files['_metadata.json']['path'] = os.path.join(ds_dir, ds_f)
			else:
				raise ValueError('could not locate metadata for %s' % resource['id'])

		# Create symlinks in one directory if inputs aren't in the same one
		if not path_match:
			tempdir = tempfile.mkdtemp()
			for f in target_files.keys():
				currf = target_files[f]
				if currf['filename'] == '_dataset_metadata.json':
					# Open the temporary file and change the JSON content format
					with open(currf['path'], 'r') as mdfile:
						jsondata = json.load(mdfile)
					md = get_terraref_metadata(jsondata)
					with open(currf['path'], 'w') as mdfile:
						json.dump(md, mdfile)
					newf = os.path.join(tempdir, target_files['raw']['filename'].replace("_raw","")+'_metadata.json')
				else:
					newf = os.path.join(tempdir, currf['filename'])
				os.symlink(currf['path'], newf)
				symlinks.append(newf)


		# Adjust sensor path based on VNIR vs SWIR and check for soil mask
		timestamp = resource['dataset_info']['name'].split(" - ")[1]
		if resource['dataset_info']['name'].find("SWIR") > -1:
			sensor_fullname = 'swir_netcdf'
			soil_mask = None
		else:
			sensor_fullname = 'vnir_netcdf'
			# Check for corresponding soil mask to include in workflow.sh if available
			soil_mask = self.sensors.get_sensor_path(timestamp, sensor='vnir_soil_masks', opts=['soil_mask'])
		outFilePath = self.sensors.create_sensor_path(timestamp, sensor=sensor_fullname)

		# Invoke terraref.sh
		logging.getLogger(__name__).info('invoking hyperspectral_workflow.sh to create: %s' % outFilePath)
		# TODO: Move this
		script_path = "/projects/arpae/terraref/shared/extractors/extractors-hyperspectral/hyperspectral/hyperspectral_workflow.sh"
		if soil_mask:
			returncode = subprocess.call(["bash", script_path, "-d", "1", "-h",
										  "-m", soil_mask, "--new_clb_mth",
										  "-i", target_files['raw']['path'], "-o", outFilePath])
		else:
			returncode = subprocess.call(["bash", script_path, "-d", "1", "-h",
										 "--new_clb_mth",
										 "-i", target_files['raw']['path'], "-o", outFilePath])

		# Verify outfile exists and upload to clowder
		logging.getLogger(__name__).info('done creating output file (%s)' % (returncode))
		if returncode != 0:
			raise ValueError('script encountered an error')
		if os.path.exists(outFilePath):
			if returncode == 0:
				if outFilePath not in resource['local_paths']:
					target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
														  self.sensors.get_display_name(sensor=sensor_fullname),
														  timestamp[:4], timestamp[:7], timestamp[:10],
														  leaf_ds_name=self.sensors.get_display_name(sensor=sensor_fullname)+' - '+timestamp)

					logging.getLogger(__name__).info('uploading %s' % outFilePath)
					upload_to_dataset(connector, host, secret_key, target_dsid, outFilePath)
				self.created += 1
				self.bytes += os.path.getsize(outFilePath)
		else:
			logging.getLogger(__name__).error('no output file was produced')

		# Send indices to betyDB
		ind_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['_ind'])
		with Dataset(ind_file, "r") as netCDF_handle:
			ndvi = netCDF_handle.get_variables_by_attributes(
					standard_name='normalized_difference_chlorophyll_index_750_705')
			NDVI705 = ndvi[0].getValue().ravel()[0]

			# TODO: Create CSV using ndviVal as primary key
			tmp_csv = 'traits.csv'
			plot_no = 'Full Field'
			csv_header = 'local_datetime,NDVI705,access_level,species,site,' \
						 'citation_author,citation_year,citation_title,method'
			csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \
					   'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % (
							timestamp, NDVI705, plot_no)
			with open(tmp_csv, 'w') as c:
				c.write(csv_header+'\n'+csv_vals)

		submit_traits(tmp_csv, bety_key=self.bety_key)

		# Remove symlinks and temp directory
		for sym in symlinks:
			os.remove(sym)
		if tempdir:
			os.rmdir(tempdir)

		self.end_message()
コード例 #5
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                metadata = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.bin'):
                img_left = fname
            elif fname.endswith('_right.bin'):
                img_right = fname
        if None in [img_left, img_right, metadata]:
            self.log_error(
                "could not locate each of left+right+metadata in processing")
            raise ValueError(
                "could not locate each of left+right+metadata in processing")

        # Determine output location & filenames
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        self.log_info(resource, "determining image shapes & gps bounds")
        left_shape = bin2tiff.get_image_shape(metadata, 'left')
        right_shape = bin2tiff.get_image_shape(metadata, 'right')
        left_gps_bounds = geojson_to_tuples(
            metadata['spatial_metadata']['left']['bounding_box'])
        right_gps_bounds = geojson_to_tuples(
            metadata['spatial_metadata']['right']['bounding_box'])
        out_tmp_tiff = os.path.join(tempfile.gettempdir(),
                                    resource['id'].encode('utf8'))

        target_dsid = build_dataset_hierarchy(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)

        if (not os.path.isfile(left_tiff)) or self.overwrite:
            self.log_info(resource, "creating & uploading %s" % left_tiff)
            left_image = bin2tiff.process_image(left_shape, img_left, None)
            # Rename output.tif after creation to avoid long path errors
            create_geotiff(left_image, left_gps_bounds, out_tmp_tiff, None,
                           False, self.extractor_info, metadata)
            # TODO: we're moving zero byte files
            shutil.move(out_tmp_tiff, left_tiff)
            if left_tiff not in resource['local_paths']:
                fileid = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           left_tiff)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            else:
                self.log_info(
                    resource,
                    "file found in dataset already; not re-uploading")
            self.created += 1
            self.bytes += os.path.getsize(left_tiff)

        if (not os.path.isfile(right_tiff)) or self.overwrite:
            self.log_info(resource, "creating & uploading %s" % right_tiff)
            right_image = bin2tiff.process_image(right_shape, img_right, None)
            create_geotiff(right_image, right_gps_bounds, out_tmp_tiff, None,
                           False, self.extractor_info, metadata)
            shutil.move(out_tmp_tiff, right_tiff)
            if right_tiff not in resource['local_paths']:
                fileid = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           right_tiff)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            else:
                self.log_info(
                    resource,
                    "file found in dataset already; not re-uploading")
            self.created += 1
            self.bytes += os.path.getsize(right_tiff)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        self.log_info(resource, "uploading extractor metadata")
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        # Upload original Lemnatec metadata to new Level_1 dataset
        md = get_terraref_metadata(all_dsmd)
        md['raw_data_source'] = host + ("" if host.endswith("/") else
                                        "/") + "datasets/" + resource['id']
        lemna_md = build_metadata(host, self.extractor_info, target_dsid, md,
                                  'dataset')
        self.log_info(resource, "uploading LemnaTec metadata")
        upload_metadata(connector, host, secret_key, target_dsid, lemna_md)

        self.end_message(resource)
コード例 #6
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get bin files and metadata
        metadata = None
        for f in resource['local_paths']:
            # First check metadata attached to dataset in Clowder for item of interest
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                metadata = get_terraref_metadata(all_dsmd, "ps2Top")
            # Otherwise, check if metadata was uploaded as a .json file
            elif f.endswith('_metadata.json') and f.find('/_metadata.json') == -1 and metadata is None:
                metadata = load_json_file(f)
        frames = {}
        for ind in range(0, 101):
            format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001
            for f in resource['local_paths']:
                if f.endswith(format_ind+'.bin'):
                    frames[ind] = f
        if None in [metadata] or len(frames) < 101:
            self.log_error(resource, 'could not find all of frames/metadata')
            return

        # Determine output directory
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        hist_path = self.sensors.create_sensor_path(timestamp, opts=['combined_hist'])
        coloredImg_path = self.sensors.create_sensor_path(timestamp, opts=['combined_pseudocolored'])
        uploaded_file_ids = []

        target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp)

        (img_width, img_height) = self.get_image_dimensions(metadata)
        gps_bounds = geojson_to_tuples(metadata['spatial_metadata']['ps2Top']['bounding_box'])

        self.log_info(resource, "image dimensions (w, h): (%s, %s)" % (img_width, img_height))

        png_frames = {}
        # skip 0101.bin since 101 is an XML file that lists the frame times
        for ind in range(0, 101):
            format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001
            png_path = self.sensors.create_sensor_path(timestamp, opts=[format_ind])
            tif_path = png_path.replace(".png", ".tif")
            png_frames[ind] = png_path
            if not os.path.exists(png_path) or self.overwrite:
                self.log_info(resource, "generating and uploading %s" % png_path)
                pixels = np.fromfile(frames[ind], np.dtype('uint8')).reshape([int(img_height), int(img_width)])
                create_image(pixels, png_path)
                create_geotiff(pixels, gps_bounds, tif_path, None, False, self.extractor_info, metadata)

                if png_path not in resource['local_paths']:
                    fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path)
                    uploaded_file_ids.append(fileid)
                self.created += 1
                self.bytes += os.path.getsize(png_path)

        # Generate aggregate outputs
        self.log_info(resource, "generating aggregates")
        if not (os.path.exists(hist_path) and os.path.exists(coloredImg_path)) or self.overwrite:
            # TODO: Coerce histogram and pseudocolor to geotiff?
            self.analyze(int(img_width), int(img_height), png_frames, hist_path, coloredImg_path)
            self.created += 2
            self.bytes += os.path.getsize(hist_path) + os.path.getsize(coloredImg_path)
        if hist_path not in resource['local_paths']:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, hist_path)
            uploaded_file_ids.append(fileid)
        if coloredImg_path not in resource['local_paths']:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, coloredImg_path)
            uploaded_file_ids.append(fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid, {
                                  "files_created": uploaded_file_ids}, 'dataset')
        self.log_info(resource, "uploading extractor metadata")
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        self.end_message(resource)
コード例 #7
0
def notifyClowderOfCompletedTask(task):
    # Verify that globus user has a mapping to clowder credentials in config file
    globUser = task['user']
    userMap = config['clowder']['user_map']

    if globUser in userMap:
        logger.info("%s task complete; notifying Clowder" % task['globus_id'],
                    extra={
                        "globus_id": task['globus_id'],
                        "action": "NOTIFYING CLOWDER OF COMPLETION"
                    })
        clowder_host = config['clowder']['host']
        clowder_key = config['clowder']['secret_key']
        clowder_user = userMap[globUser]['clowder_user']
        clowder_pass = userMap[globUser]['clowder_pass']
        clowder_id = userMap[globUser]['clowder_id']
        clowder_context = userMap[globUser]['context']

        sess = requests.Session()
        sess.auth = (clowder_user, clowder_pass)

        # Response can be OK, RETRY or ERROR
        response = "OK"

        # Prepare upload object with all file(s) found
        updatedTask = safeCopy(task)

        space_id = task['contents']['space_id'] if 'space_id' in task[
            'contents'] else config['clowder']['primary_space']
        for ds in task['contents']:
            # Skip any unexpected files at root level, e.g.
            #   /home/clowder/sites/ua-mac/raw_data/GetFluorescenceValues.m
            #   /home/clowder/sites/ua-mac/raw_data/irrigation/2017-06-04/@Recycle/flowmetertotals_March-2017.csv",
            if ds in ["LemnaTec - MovingSensor"] or ds.find("@Recycle") > -1:
                continue

            filesQueued = []
            fileFormData = []
            datasetMD = None
            datasetMDFile = False
            lastFile = None
            lastFileKey = None
            sensorname = ds.split(" - ")[0]

            logger.info("%s -- Processing [%s]" % (task['globus_id'], ds))

            # Assign dataset-level metadata if provided
            if "md" in task['contents'][ds]:
                datasetMD = task['contents'][ds]['md']

            # Add local files to dataset by path
            if 'files' in task['contents'][ds]:
                for fkey in task['contents'][ds]['files']:
                    fobj = task['contents'][ds]['files'][fkey]
                    if 'clowder_id' not in fobj or fobj['clowder_id'] == "":
                        if os.path.exists(fobj['path']):
                            if fobj['name'].find("metadata.json") == -1:
                                if 'md' in fobj:
                                    # Use [1,-1] to avoid json.dumps wrapping quotes
                                    # Replace \" with " to avoid json.dumps escaping quotes
                                    mdstr = ', "md":' + json.dumps(
                                        fobj['md'])[1:-1].replace('\\"', '"')
                                else:
                                    mdstr = ""
                                filesQueued.append((fobj['path'], mdstr))
                                lastFile = fobj['name']
                                lastFileKey = fkey
                            else:
                                try:
                                    datasetMD = loadJsonFile(fobj['path'])
                                    datasetMDFile = fkey
                                except:
                                    logger.error(
                                        "[%s] could not decode JSON from %s" %
                                        (ds, fobj['path']))
                                    updatedTask['contents'][ds]['files'][fkey][
                                        'clowder_id'] = "FILE NOT FOUND"
                                    updatedTask['contents'][ds]['files'][fkey][
                                        'error'] = "Failed to load JSON"
                                    writeTaskToDatabase(updatedTask)
                                    if response == "OK":
                                        response = "ERROR"  # Don't overwrite a RETRY
                        else:
                            logger.error("[%s] file not found: %s" %
                                         (ds, fobj['path']))
                            updatedTask['contents'][ds]['files'][fkey][
                                'clowder_id'] = "FILE NOT FOUND"
                            updatedTask['contents'][ds]['files'][fkey][
                                'error'] = "File not found"
                            writeTaskToDatabase(updatedTask)
                            if response == "OK":
                                response = "ERROR"  # Don't overwrite a RETRY

            if len(filesQueued) > 0 or datasetMD:
                # Try to clean metadata first
                if datasetMD:
                    cleaned_dsmd = None
                    try:
                        cleaned_dsmd = clean_metadata(datasetMD, sensorname)
                    except Exception as e:
                        logger.error("[%s] could not clean md: %s" %
                                     (ds, str(e)))
                        task['contents'][ds][
                            'error'] = "Could not clean metadata: %s" % str(e)
                        # TODO: possible this could be recoverable with more info from clean_metadata
                        if response == "OK":
                            response = "ERROR"  # Don't overwrite a RETRY

                if ds.find(" - ") > -1:
                    # e.g. "co2Sensor - 2016-12-25" or "VNIR - 2016-12-25__12-32-42-123"
                    c_sensor = ds.split(" - ")[0]
                    c_date = ds.split(" - ")[1]
                    c_year = c_date.split('-')[0]
                    c_month = c_date.split('-')[1]
                    if c_date.find("__") == -1:
                        # If we only have a date and not a timestamp, don't create date collection
                        c_date = None
                    else:
                        c_date = c_date.split("__")[0].split("-")[2]
                else:
                    c_sensor, c_date, c_year, c_month = ds, None, None, None

                # Get dataset from clowder, or create & associate with collections
                try:
                    hierarchy_host = clowder_host + (
                        "/" if not clowder_host.endswith("/") else "")
                    dsid = build_dataset_hierarchy(hierarchy_host, clowder_key,
                                                   clowder_user, clowder_pass,
                                                   space_id, c_sensor, c_year,
                                                   c_month, c_date, ds)
                    logger.info("   [%s] id: %s" % (ds, dsid))
                except Exception as e:
                    logger.error("[%s] could not build hierarchy: %s" %
                                 (ds, str(e)))
                    task['contents'][ds][
                        'retry'] = "Could not build dataset hierarchy: %s" % str(
                            e)
                    response = "RETRY"
                    continue

                if dsid:
                    dsFileList = fetchDatasetFileList(dsid, sess)
                    # Only send files not already present in dataset by path
                    for queued in filesQueued:
                        alreadyStored = False
                        for storedFile in dsFileList:
                            if queued[0] == storedFile['filepath']:
                                logger.info(
                                    "   skipping file %s (already uploaded)" %
                                    queued[0])
                                alreadyStored = True
                                break
                        if not alreadyStored:
                            fileFormData.append(
                                ("file",
                                 '{"path":"%s"%s}' % (queued[0], queued[1])))

                    if datasetMD and cleaned_dsmd:
                        # Check for existing metadata from the site user
                        alreadyAttached = False
                        md_existing = download_metadata(
                            None, hierarchy_host, clowder_key, dsid)
                        for mdobj in md_existing:
                            if 'agent' in mdobj and 'user_id' in mdobj['agent']:
                                if mdobj['agent'][
                                        'user_id'] == "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % clowder_id:
                                    logger.info(
                                        "   skipping metadata (already attached)"
                                    )
                                    alreadyAttached = True
                                    break
                        if not alreadyAttached:
                            md = {
                                "@context": [
                                    "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
                                    {
                                        "@vocab": clowder_context
                                    }
                                ],
                                "content":
                                cleaned_dsmd,
                                "agent": {
                                    "@type":
                                    "cat:user",
                                    "user_id":
                                    "https://terraref.ncsa.illinois.edu/clowder/api/users/%s"
                                    % clowder_id
                                }
                            }

                            dsmd = sess.post(
                                clowder_host + "/api/datasets/" + dsid +
                                "/metadata.jsonld",
                                headers={'Content-Type': 'application/json'},
                                data=json.dumps(md))

                            if dsmd.status_code in [500, 502, 504]:
                                logger.error(
                                    "[%s] failed to attach metadata (%s: %s)" %
                                    (ds, dsmd.status_code, dsmd.text))
                                updatedTask['contents'][ds]['files'][
                                    datasetMDFile]['retry'] = "%s: %s" % (
                                        dsmd.status_code, dsmd.text)
                                response = "RETRY"
                            elif dsmd.status_code != 200:
                                logger.error(
                                    "[%s] failed to attach metadata (%s: %s)" %
                                    (ds, dsmd.status_code, dsmd.text))
                                updatedTask['contents'][ds]['files'][
                                    datasetMDFile]['error'] = "%s: %s" % (
                                        dsmd.status_code, dsmd.text)
                                response = "ERROR"
                            else:
                                if datasetMDFile:
                                    logger.info(
                                        "   [%s] added metadata from .json file"
                                        % ds,
                                        extra={
                                            "dataset_name": ds,
                                            "dataset_id": dsid,
                                            "action": "METADATA ADDED",
                                            "metadata": datasetMD
                                        })
                                    updatedTask['contents'][ds]['files'][
                                        datasetMDFile][
                                            'metadata_loaded'] = True
                                    updatedTask['contents'][ds]['files'][
                                        datasetMDFile][
                                            'clowder_id'] = "attached to dataset"
                                    writeTaskToDatabase(updatedTask)
                                else:
                                    # Remove metadata from activeTasks on success even if file upload fails in next step, so we don't repeat md
                                    logger.info("   [%s] added metadata" % ds,
                                                extra={
                                                    "dataset_name": ds,
                                                    "dataset_id": dsid,
                                                    "action": "METADATA ADDED",
                                                    "metadata": datasetMD
                                                })
                                    del updatedTask['contents'][ds]['md']
                                    writeTaskToDatabase(updatedTask)

                    if len(fileFormData) > 0:
                        # Upload collected files for this dataset
                        # Boundary encoding from http://stackoverflow.com/questions/17982741/python-using-reuests-library-for-multipart-form-data
                        logger.info("   [%s] uploading unprocessed files" % ds,
                                    extra={
                                        "dataset_id": dsid,
                                        "dataset_name": ds,
                                        "action": "UPLOADING FILES",
                                        "filelist": fileFormData
                                    })

                        (content,
                         header) = encode_multipart_formdata(fileFormData)
                        fi = sess.post(clowder_host + "/api/uploadToDataset/" +
                                       dsid,
                                       headers={'Content-Type': header},
                                       data=content)

                        if fi.status_code in [104, 500, 502, 504]:
                            logger.error(
                                "[%s] failed to attach files (%s: %s)" %
                                (ds, fi.status_code, fi.text))
                            updatedTask['contents'][ds]['files'][
                                datasetMDFile]['retry'] = "%s: %s" % (
                                    fi.status_code, fi.text)
                            response = "RETRY"
                        elif fi.status_code != 200:
                            logger.error(
                                "[%s] failed to attach files (%s: %s)" %
                                (ds, fi.status_code, fi.text))
                            updatedTask['contents'][ds]['files'][
                                datasetMDFile]['error'] = "%s: %s" % (
                                    fi.status_code, fi.text)
                            response = "ERROR"
                        else:
                            loaded = fi.json()
                            if 'ids' in loaded:
                                for fobj in loaded['ids']:
                                    logger.info("   [%s] added file %s" %
                                                (ds, fobj['name']))
                                    for fkey in updatedTask['contents'][ds][
                                            'files']:
                                        if updatedTask['contents'][ds][
                                                'files'][fkey]['name'] == fobj[
                                                    'name']:
                                            updatedTask['contents'][ds][
                                                'files'][fkey][
                                                    'clowder_id'] = fobj['id']
                                            # remove any previous retry/error messages
                                            if 'retry' in updatedTask[
                                                    'contents'][ds]['files'][
                                                        fkey]:
                                                del (
                                                    updatedTask['contents'][ds]
                                                    ['files'][fkey]['retry'])
                                            if 'error' in updatedTask[
                                                    'contents'][ds]['files'][
                                                        fkey]:
                                                del (
                                                    updatedTask['contents'][ds]
                                                    ['files'][fkey]['error'])
                                            break
                                    writeTaskToDatabase(updatedTask)
                            else:
                                logger.info("   [%s] added file %s" %
                                            (ds, lastFile))
                                updatedTask['contents'][ds]['files'][
                                    lastFileKey]['clowder_id'] = loaded['id']
                                # remove any previous retry/error messages
                                if 'retry' in updatedTask['contents'][ds][
                                        'files'][lastFileKey]:
                                    del (updatedTask['contents'][ds]['files']
                                         [lastFileKey]['retry'])
                                if 'error' in updatedTask['contents'][ds][
                                        'files'][lastFileKey]:
                                    del (updatedTask['contents'][ds]['files']
                                         [lastFileKey]['error'])
                                writeTaskToDatabase(updatedTask)

        return response
    else:
        logger.error("%s task: no credentials for Globus user %s" %
                     (task['globus_id'], globUser))
        return "ERROR"
コード例 #8
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        if type(parameters) is str:
            parameters = json.loads(parameters)
        if 'parameters' in parameters:
            parameters = parameters['parameters']
        if type(parameters) is unicode:
            parameters = json.loads(str(parameters))

        # Input path will suggest which sensor we are seeing
        sensor_type = None
        for f in resource['files']:
            filepath = f['filepath']
            for sens in ["rgb_geotiff", "ir_geotiff", "laser3d_heightmap"]:
                if filepath.find(sens) > -1:
                    sensor_type = sens.split("_")[0]
                    break
            if sensor_type is not None:
                break

        # dataset_name = "Full Field - 2017-01-01"
        dataset_name = parameters["output_dataset"]
        scan_name = parameters["scan_type"] if "scan_type" in parameters else ""
        timestamp = dataset_name.split(" - ")[1]

        out_tif_full = self.sensors.create_sensor_path(
            timestamp, opts=[sensor_type, scan_name])
        out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif")
        out_vrt = out_tif_full.replace(".tif", ".vrt")
        out_dir = os.path.dirname(out_vrt)

        if os.path.exists(out_vrt) and not self.overwrite:
            self.log_skip(resource,
                          "%s already exists; ending process" % out_vrt)
            return

        if not self.darker or sensor_type != 'rgb':
            (nu_created, nu_bytes) = self.generateSingleMosaic(
                connector, host, secret_key, sensor_type, out_dir, out_vrt,
                out_tif_thumb, out_tif_full, parameters, resource)
        else:
            (nu_created, nu_bytes) = self.generateDarkerMosaic(
                connector, host, secret_key, sensor_type, out_dir, out_vrt,
                out_tif_thumb, out_tif_full, parameters, resource)
        self.created += nu_created
        self.bytes += nu_bytes

        # Get dataset ID or create it, creating parent collections as needed
        target_dsid = build_dataset_hierarchy(host,
                                              secret_key,
                                              self.clowder_user,
                                              self.clowder_pass,
                                              self.clowderspace,
                                              self.sensors.get_display_name(),
                                              timestamp[:4],
                                              timestamp[5:7],
                                              leaf_ds_name=dataset_name)

        # Upload full field image to Clowder
        content = {
            "comment":
            "This stitched image is computed based on an assumption that the scene is planar. \
                There are likely to be be small offsets near the boundary of two images anytime there are plants \
                at the boundary (because those plants are higher than the ground plane), or where the dirt is \
                slightly higher or lower than average.",
            "file_ids": parameters["file_paths"]
        }

        if os.path.exists(out_tif_thumb):
            thumbid = upload_to_dataset(connector, host, self.clowder_user,
                                        self.clowder_pass, target_dsid,
                                        out_tif_thumb)
            thumbmeta = build_metadata(host, self.extractor_info, thumbid,
                                       content, 'file')
            upload_metadata(connector, host, secret_key, thumbid, thumbmeta)

        if os.path.exists(out_tif_full):
            fullid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       out_tif_full)
            fullmeta = build_metadata(host, self.extractor_info, fullid,
                                      content, 'file')
            upload_metadata(connector, host, secret_key, fullid, fullmeta)

        self.end_message(resource)
コード例 #9
0
def upload_to_clowder(dir, type, scan):
    conn = Connector(None , mounted_paths={"/home/clowder/sites":"/home/clowder/sites"})

    if args.type == "rgb_geotiff":
        print("Submission of RGB GeoTIFF would happen now")
        return

        disp = "RGB GeoTIFFs"
        timestamp = dir.split("/")[-2]
        target_dsid = build_dataset_hierarchy(host, secret_key, clow_user, clow_pass, clowspace, disp,
                                              timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=disp+' - '+timestamp)

        output_ids = {}
        # First, upload actual files
        for targ_file in ["rgb_geotiff_L1_ua-mac_%s_left.tif" % ts,
                          "rgb_geotiff_L1_ua-mac_%s_right.tif" % ts,
                          "rgb_geotiff_L1_ua-mac_%s_nrmac_left.tif" % ts,
                          "rgb_geotiff_L1_ua-mac_%s_nrmac_right.tif" % ts]:
            targ_path = os.path.join(dir, targ_file)
            if os.path.isfile(targ_path):
                file_id = upload_to_dataset(conn, host, clow_user, clow_pass, target_dsid, targ_path)
                output_ids[targ_file] = file_id

        # Second, upload metadata
        ds_md = os.path.join(dir, "clean_metadata.json")
        if os.path.isfile(ds_md):
            # Dataset metadata
            extractor_info = {
                "extractor_name": "terra.stereo-rgb.bin2tif",
                "extractor_version": "1.1",
                "extractor_author": "Max Burnette <*****@*****.**>",
                "extractor_description": "Stereo RGB Image Bin to GeoTIFF Converter",
                "extractor_repo": "https://github.com/terraref/extractors-stereo-rgb.git"
            }

            with open(ds_md, 'r') as contents:
                jmd = json.load(contents)
            upload_dataset_metadata(conn, host, secret_key, clowder_id, jmd)
            lemna_md = build_metadata(host, extractor_info, target_dsid, jmd, 'dataset')
            upload_metadata(connector, host, secret_key, target_dsid, lemna_md)

        nrmac_md = os.path.join(dir, "nrmac_scores.json")
        if os.path.isfile(nrmac_md):
            # NRMAC file metadata
            extractor_info = {
                "extractor_name": "terra.stereo-rgb.nrmac",
                "extractor_version": "1.0",
                "extractor_author": "Sidike Paheding <*****@*****.**>",
                "extractor_description": "Stereo RGB No-Reference Multiscale Autocorrelation",
                "extractor_repo": "https://github.com/terraref/quality-metrics.git"
            }

            with open(nrmac_md, 'r') as contents:
                jmd = json.load(contents)
            fi_id = output_ids["rgb_geotiff_L1_ua-mac_%s_nrmac_left.tif" % ts]
            ext_meta = build_metadata(host, extractor_info, fi_id, {
                "quality_score": jmd["quality_score"]["left"]
            }, 'file')
            upload_metadata(connector, host, secret_key, fi_id, ext_meta)
            fi_id = output_ids["rgb_geotiff_L1_ua-mac_%s_nrmac_right.tif" % ts]
            ext_meta = build_metadata(host, extractor_info, fi_id, {
                "quality_score": jmd["quality_score"]["right"]
            }, 'file')
            upload_metadata(connector, host, secret_key, fi_id, ext_meta)

        # Write output_ids.json
        with open(os.path.join(dir, "clowder_ids.json"), 'w') as js:
            js.write(json.dumps(output_ids))

    elif args.type == "fullfield":
        print("Submission of Full Field Mosaic would happen now")
        return

        disp = "Full Field Stitched Mosaics"
        timestamp = dir.split("/")[-2]
        target_dsid = build_dataset_hierarchy(host, secret_key, clow_user, clow_pass, clowspace, disp,
                                              timestamp[:4], timestamp[5:7], leaf_ds_name=disp+' - '+timestamp)

        # TODO: Can each scan be in a separate folder in Clowder?

        output_ids = {}
        # First, upload NRMAC files
        for targ_file in ["fullfield_L1_ua-mac_%s_%s_nrmac.vrt" % (day, scan),
                          "fullfield_L1_ua-mac_%s_%s_nrmac.tif" % (day, scan)]:
            targ_path = os.path.join(dir, targ_file)
            if os.path.isfile(targ_path):
                file_id = upload_to_dataset(conn, host, clow_user, clow_pass, target_dsid, targ_path)
                output_ids[targ_file] = file_id

        # Second, upload main stitched files
        for targ_file in ["fullfield_L1_ua-mac_%s_%s.vrt" % (day, scan),
                          "fullfield_L1_ua-mac_%s_%s.tif" % (day, scan),
                          "fullfield_L1_ua-mac_%s_%s_thumb.tif" % (day, scan),
                          "fullfield_L1_ua-mac_%s_%s_10pct.tif" % (day, scan),
                          "fullfield_L1_ua-mac_%s_%s.png" % (day, scan)]:
            targ_path = os.path.join(dir, targ_file)
            if os.path.isfile(targ_path):
                file_id = upload_to_dataset(conn, host, clow_user, clow_pass, target_dsid, targ_path)
                output_ids[targ_file] = file_id

        # Third, upload trait CSV files
        for targ_file in ["fullfield_L1_ua-mac_%s_%s_canopycover_bety.csv" % (day, scan),
                          "fullfield_L1_ua-mac_%s_%s_canopycover_geo.csv" % (day, scan)]:
            targ_path = os.path.join(dir, targ_file)
            if os.path.isfile(targ_path):
                file_id = upload_to_dataset(conn, host, clow_user, clow_pass, target_dsid, targ_path)
                output_ids[targ_file] = file_id

        # Write output_ids.json
        with open(os.path.join(dir, "clowder_ids.json"), 'w') as js:
            js.write(json.dumps(output_ids))
コード例 #10
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        # Get bin files and metadata
        metadata = None
        for f in resource['local_paths']:
            # First check metadata attached to dataset in Clowder for item of interest
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                metadata = get_extractor_metadata(all_dsmd)
            # Otherwise, check if metadata was uploaded as a .json file
            elif f.endswith('_metadata.json') and f.find(
                    '/_metadata.json') == -1 and metadata is None:
                metadata = load_json_file(f)
        frames = {}
        for ind in range(0, 101):
            format_ind = "{0:0>4}".format(ind)  # e.g. 1 becomes 0001
            for f in resource['files']:
                if f['filename'].endswith(format_ind + '.bin'):
                    frames[ind] = f['filename']
        if None in [metadata] or len(frames) < 101:
            logging.error('could not find all of frames/metadata')
            return

        # Determine output directory
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        hist_path = self.sensors.create_sensor_path(timestamp,
                                                    opts=['combined_hist'])
        coloredImg_path = self.sensors.create_sensor_path(
            timestamp, opts=['combined_pseudocolored'])
        uploaded_file_ids = []

        target_dsid = build_dataset_hierarchy(
            connector,
            host,
            secret_key,
            self.clowderspace,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[:7],
            timestamp[:10],
            leaf_ds_name=resource['dataset_info']['name'])

        img_width = 1936
        img_height = 1216
        png_frames = {}
        # skip 0101.bin since 101 is an XML file that lists the frame times
        for ind in range(0, 101):
            format_ind = "{0:0>4}".format(ind)  # e.g. 1 becomes 0001
            png_path = self.sensors.create_sensor_path(timestamp,
                                                       opts=[format_ind])
            png_frames[ind] = png_path
            if not os.path.exists(png_path) or self.overwrite:
                logging.info("...generating and uploading %s" % png_path)
                pixels = numpy.fromfile(frames[ind],
                                        numpy.dtype('uint8')).reshape(
                                            [img_height, img_width])
                create_image(pixels, png_path)
                if png_path not in resource['local_paths']:
                    fileid = upload_to_dataset(connector, host, secret_key,
                                               target_dsid, png_path)
                    uploaded_file_ids.append(fileid)
                self.created += 1
                self.bytes += os.path.getsize(png_path)

        # Generate aggregate outputs
        logging.info("...generating aggregates")
        if not (os.path.exists(hist_path)
                and os.path.exists(coloredImg_path)) or self.overwrite:
            psiiCore.psii_analysis(png_frames, hist_path, coloredImg_path)
            self.created += 2
            self.bytes += os.path.getsize(hist_path)
            self.bytes += os.path.getsize(coloredImg_path)
        if hist_path not in resource['local_paths']:
            fileid = upload_to_dataset(connector, host, secret_key,
                                       target_dsid, hist_path)
            uploaded_file_ids.append(fileid)
        if coloredImg_path not in resource['local_paths']:
            fileid = upload_to_dataset(connector, host, secret_key,
                                       target_dsid, coloredImg_path)
            uploaded_file_ids.append(fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        self.end_message()
コード例 #11
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        # Get BIN file and metadata
        bin_file, metadata = None, None
        for f in resource['local_paths']:
            # First check metadata attached to dataset in Clowder for item of interest
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                metadata = get_terraref_metadata(all_dsmd, 'flirIrCamera')
            # Otherwise, check if metadata was uploaded as a .json file
            elif f.endswith('_ir.bin'):
                bin_file = f
        if None in [bin_file, metadata]:
            logging.getLogger(__name__).error(
                'could not find all both of ir.bin/metadata')
            return

        # Determine output directory
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        png_path = self.sensors.create_sensor_path(timestamp, ext='png')
        tiff_path = self.sensors.create_sensor_path(timestamp)
        uploaded_file_ids = []

        target_dsid = build_dataset_hierarchy(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)

        skipped_png = False
        if not os.path.exists(png_path) or self.overwrite:
            logging.getLogger(__name__).info("Generating %s" % png_path)
            # get raw data from bin file
            raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape(
                [480, 640]).astype('float')
            raw_data = numpy.rot90(raw_data, 3)
            create_image(raw_data, png_path, self.scale_values)
            # Only upload the newly generated file to Clowder if it isn't already in dataset
            if png_path not in resource["local_paths"]:
                fileid = upload_to_dataset(connector, host, secret_key,
                                           target_dsid, png_path)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            self.created += 1
            self.bytes += os.path.getsize(png_path)
        else:
            skipped_png = True

        if not os.path.exists(tiff_path) or self.overwrite:
            logging.getLogger(__name__).info("Generating temperature matrix")
            gps_bounds = geojson_to_tuples(
                metadata['spatial_metadata']['flirIrCamera']['bounding_box'])
            if skipped_png:
                raw_data = numpy.fromfile(bin_file,
                                          numpy.dtype('<u2')).reshape(
                                              [480, 640]).astype('float')
                raw_data = numpy.rot90(raw_data, 3)
            tc = getFlir.rawData_to_temperature(raw_data,
                                                metadata)  # get temperature

            logging.getLogger(__name__).info("Creating %s" % tiff_path)
            # Rename temporary tif after creation to avoid long path errors
            out_tmp_tiff = os.path.join(tempfile.gettempdir(),
                                        resource['id'].encode('utf8'))
            create_geotiff(tc, gps_bounds, out_tmp_tiff, None, True,
                           self.extractor_info, metadata)
            shutil.move(out_tmp_tiff, tiff_path)
            if tiff_path not in resource["local_paths"]:
                fileid = upload_to_dataset(connector, host, secret_key,
                                           target_dsid, tiff_path)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            self.created += 1
            self.bytes += os.path.getsize(tiff_path)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        # Upload original Lemnatec metadata to new Level_1 dataset
        md = get_terraref_metadata(all_dsmd)
        md['raw_data_source'] = host + ("" if host.endswith("/") else
                                        "/") + "datasets/" + resource['id']
        lemna_md = build_metadata(host, self.extractor_info, target_dsid, md,
                                  'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, lemna_md)

        self.end_message()