def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        with open(resource['local_paths'][0], 'r') as inputcsv:
            inputlines = inputcsv.readlines()

        if len(inputlines) <= 1:
            # first check if there is data besides header line
            self.log_info(resource,
                          "no trait lines found in CSV; skipping upload")
        else:
            # submit CSV to BETY
            self.log_info(
                resource, "found %s trait lines; submitting CSV to bety" %
                str(len(inputlines) - 1))
            submit_traits(resource['local_paths'][0], betykey=self.bety_key)

            # Add metadata to original dataset indicating this was run
            self.log_info(resource,
                          "updating file metadata (%s)" % resource['id'])
            ext_meta = build_metadata(host, self.extractor_info,
                                      resource['id'], {}, 'file')
            upload_metadata(connector, host, secret_key, resource['id'],
                            ext_meta)

        self.end_message(resource)
def insert_height_traits_into_betydb(in_dir, out_dir, str_date,
                                     param_percentile, sensor_d, convt):

    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)

    hist_e, hist_w = analysis_utils.load_histogram_from_both_npy(in_dir)

    out_file = os.path.join(out_dir, str_date + '_height.csv')
    csv = open(out_file, 'w')

    (fields, traits) = analysis_utils.get_traits_table_height()

    csv.write(','.join(map(str, fields)) + '\n')

    for j in range(0, PLOT_COL_NUM * PLOT_RANGE_NUM):
        targetHist_e = hist_e[j, :]
        targetHist_w = hist_w[j, :]
        plotNum = j + 1
        if (targetHist_e.max() == 0) or (targetHist_w.max()) == 0:
            continue
        else:
            targetHist_e = targetHist_e / np.sum(targetHist_e)
            quantiles_e = np.cumsum(targetHist_e)
            b = np.arange(len(quantiles_e))
            c = b[quantiles_e > param_percentile]
            quantile_e = min(c)

            targetHist_w = targetHist_w / np.sum(targetHist_w)
            quantiles_w = np.cumsum(targetHist_w)
            b = np.arange(len(quantiles_w))
            c = b[quantiles_w > param_percentile]
            quantile_w = min(c)

            estHeight = (quantile_e + quantile_w) / 2

            str_time = str_date + 'T12:00:00'
            traits['local_datetime'] = str_time
            traits['canopy_height'] = str(
                (B_F_SLOPE * float(estHeight) + B_F_OFFSET) / 100.0)
            traits['site'] = analysis_utils.parse_site_from_plotNum_1728(
                plotNum, convt)
            trait_list = analysis_utils.generate_traits_list_height(traits)
            csv.write(','.join(map(str, trait_list)) + '\n')

    csv.close()
    #submitToBety(out_file)
    betydb.submit_traits(out_file,
                         filetype='csv',
                         betykey=betydb.get_bety_key(),
                         betyurl=betydb.get_bety_url())

    return
Example #3
0
def upload_to_bety(file, clowder_id):
    conn = Connector(
        None, mounted_paths={"/home/clowder/sites": "/home/clowder/sites"})

    submit_traits(file, betykey=bety_key)

    # Extractor metadata
    extractor_info = {
        "extractor_name": "terra.betydb",
        "extractor_version": "1.0",
        "extractor_author": "Max Burnette <*****@*****.**>",
        "extractor_description": "BETYdb CSV uploader",
        "extractor_repo": "https://github.com/terraref/computing-pipeline.git"
    }

    # Add metadata to original dataset indicating this was run
    ext_meta = build_metadata(
        host, extractor_info, clowder_id, {
            "betydb_link":
            "https://terraref.ncsa.illinois.edu/bety/api/v1/variables?name=canopy_cover"
        }, 'file')
    upload_metadata(conn, host, secret_key, clowder_id, ext_meta)
Example #4
0
	def process_message(self, connector, host, secret_key, resource, parameters):
		self.start_message()

		# Find input files in dataset
		target_files = {
			'raw': None,
			'raw.hdr': None,
			'image.jpg': None,
			'frameIndex.txt': None,
			'settings.txt': None,
			"_metadata.json": None
		}

		metafile = None
		ds_metafile = None
		last_path = None
		path_match = None
		tempdir = None
		symlinks = []
		for f in resource['local_paths']:
			for fileExt in target_files.keys():
				if f.endswith(fileExt):
					if fileExt != '_metadata.json':
						filedir = os.path.dirname(f)
						if not last_path:
							last_path = filedir
						else:
							if filedir != last_path:
								path_match = False
							last_path = filedir
						target_files[fileExt] = {'filename': os.path.basename(f), 'path': f}
					else:
						if f.endswith('/_dataset_metadata.json'):
							ds_metafile = f
						elif not f.endswith('/_metadata.json'):
							metafile = f
							target_files['_metadata.json'] = {'filename': os.path.basename(metafile),
															  'path': metafile}

		# Identify md file either with other dataset files, or attached to Clowder dataset
		if metafile == None:
			if ds_metafile != None:
				# Found dataset metadata, so check for the .json file alongside other files
				logging.getLogger(__name__).info("...checking for local metadata file alongside other files")
				ds_dir = os.path.dirname(target_files['raw']['path'])
				for ds_f in os.path.listdir(ds_dir):
					if ds_f.endswith("_metadata.json"):
						target_files['_metadata.json']['path'] = os.path.join(ds_dir, ds_f)
			else:
				raise ValueError('could not locate metadata for %s' % resource['id'])

		# Create symlinks in one directory if inputs aren't in the same one
		if not path_match:
			tempdir = tempfile.mkdtemp()
			for f in target_files.keys():
				currf = target_files[f]
				if currf['filename'] == '_dataset_metadata.json':
					# Open the temporary file and change the JSON content format
					with open(currf['path'], 'r') as mdfile:
						jsondata = json.load(mdfile)
					md = get_terraref_metadata(jsondata)
					with open(currf['path'], 'w') as mdfile:
						json.dump(md, mdfile)
					newf = os.path.join(tempdir, target_files['raw']['filename'].replace("_raw","")+'_metadata.json')
				else:
					newf = os.path.join(tempdir, currf['filename'])
				os.symlink(currf['path'], newf)
				symlinks.append(newf)


		# Adjust sensor path based on VNIR vs SWIR and check for soil mask
		timestamp = resource['dataset_info']['name'].split(" - ")[1]
		if resource['dataset_info']['name'].find("SWIR") > -1:
			sensor_fullname = 'swir_netcdf'
			soil_mask = None
		else:
			sensor_fullname = 'vnir_netcdf'
			# Check for corresponding soil mask to include in workflow.sh if available
			soil_mask = self.sensors.get_sensor_path(timestamp, sensor='vnir_soil_masks', opts=['soil_mask'])
		outFilePath = self.sensors.create_sensor_path(timestamp, sensor=sensor_fullname)

		# Invoke terraref.sh
		logging.getLogger(__name__).info('invoking hyperspectral_workflow.sh to create: %s' % outFilePath)
		# TODO: Move this
		script_path = "/projects/arpae/terraref/shared/extractors/extractors-hyperspectral/hyperspectral/hyperspectral_workflow.sh"
		if soil_mask:
			returncode = subprocess.call(["bash", script_path, "-d", "1", "-h",
										  "-m", soil_mask, "--new_clb_mth",
										  "-i", target_files['raw']['path'], "-o", outFilePath])
		else:
			returncode = subprocess.call(["bash", script_path, "-d", "1", "-h",
										 "--new_clb_mth",
										 "-i", target_files['raw']['path'], "-o", outFilePath])

		# Verify outfile exists and upload to clowder
		logging.getLogger(__name__).info('done creating output file (%s)' % (returncode))
		if returncode != 0:
			raise ValueError('script encountered an error')
		if os.path.exists(outFilePath):
			if returncode == 0:
				if outFilePath not in resource['local_paths']:
					target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
														  self.sensors.get_display_name(sensor=sensor_fullname),
														  timestamp[:4], timestamp[:7], timestamp[:10],
														  leaf_ds_name=self.sensors.get_display_name(sensor=sensor_fullname)+' - '+timestamp)

					logging.getLogger(__name__).info('uploading %s' % outFilePath)
					upload_to_dataset(connector, host, secret_key, target_dsid, outFilePath)
				self.created += 1
				self.bytes += os.path.getsize(outFilePath)
		else:
			logging.getLogger(__name__).error('no output file was produced')

		# Send indices to betyDB
		ind_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['_ind'])
		with Dataset(ind_file, "r") as netCDF_handle:
			ndvi = netCDF_handle.get_variables_by_attributes(
					standard_name='normalized_difference_chlorophyll_index_750_705')
			NDVI705 = ndvi[0].getValue().ravel()[0]

			# TODO: Create CSV using ndviVal as primary key
			tmp_csv = 'traits.csv'
			plot_no = 'Full Field'
			csv_header = 'local_datetime,NDVI705,access_level,species,site,' \
						 'citation_author,citation_year,citation_title,method'
			csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \
					   'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % (
							timestamp, NDVI705, plot_no)
			with open(tmp_csv, 'w') as c:
				c.write(csv_header+'\n'+csv_vals)

		submit_traits(tmp_csv, bety_key=self.bety_key)

		# Remove symlinks and temp directory
		for sym in symlinks:
			os.remove(sym)
		if tempdir:
			os.rmdir(tempdir)

		self.end_message()
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        tmp_csv = "canopycovertraits.csv"
        csv_file = open(tmp_csv, 'w')
        (fields, traits) = ccCore.get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        # Get full list of experiment plots using date as filter
        logging.info(connector)
        logging.info(host)
        logging.info(secret_key)
        logging.info(resource)
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        all_plots = get_site_boundaries(timestamp, city='Maricopa')

        successful_plots = 0
        for plotname in all_plots:
            bounds = all_plots[plotname]

            # Use GeoJSON string to clip full field to this plot
            try:
                (pxarray, geotrans) = clip_raster(resource['local_paths'][0],
                                                  bounds)
                if len(pxarray.shape) < 3:
                    logging.error("unexpected array shape for %s (%s)" %
                                  (plotname, pxarray.shape))
                    continue
                ccVal = ccCore.gen_cc_for_img(rollaxis(pxarray, 0, 3), 5)
                ccVal *= 100.0  # Make 0-100 instead of 0-1
                successful_plots += 1
                if successful_plots % 10 == 0:
                    logging.info("processed %s/%s plots successfully" %
                                 (successful_plots, len(all_plots)))
            except:
                logging.error("error generating cc for %s" % plotname)
                continue

            traits['canopy_cover'] = str(ccVal)
            traits['site'] = plotname
            traits['local_datetime'] = timestamp + "T12:00:00"
            trait_list = ccCore.generate_traits_list(traits)

            csv_file.write(','.join(map(str, trait_list)) + '\n')

            # Prepare and submit datapoint
            centroid_lonlat = json.loads(
                centroid_from_geojson(bounds))["coordinates"]
            time_fmt = timestamp + "T12:00:00-07:00"
            dpmetadata = {
                "source":
                host + ("" if host.endswith("/") else "/") + "files/" +
                resource['id'],
                "canopy_cover":
                ccVal
            }
            create_datapoint_with_dependencies(
                connector, host, secret_key, "Canopy Cover",
                (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
                dpmetadata, timestamp)

        # submit CSV to BETY
        csv_file.close()
        submit_traits(tmp_csv, betykey=self.bety_key)

        # Add metadata to original dataset indicating this was run
        ext_meta = build_metadata(
            host, self.extractor_info, resource['parent']['id'], {
                "plots_processed":
                successful_plots,
                "plots_skipped":
                len(all_plots) - successful_plots,
                "betydb_link":
                "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=canopy_cover"
            }, 'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        ext_meta)

        self.end_message()
Example #6
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        tmp_csv = "meantemptraits.csv"
        csv_file = open(tmp_csv, 'w')
        (fields, traits) = get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        # Get full list of experiment plots using date as filter
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        dsmd = download_metadata(connector, host, secret_key,
                                 resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        all_plots = get_site_boundaries(timestamp, city='Maricopa')

        successful_plots = 0
        for plotname in all_plots:
            bounds = all_plots[plotname]

            # Use GeoJSON string to clip full field to this plot
            (pxarray, geotrans) = clip_raster(resource['local_paths'][0],
                                              bounds)
            #tc = getFlir.rawData_to_temperature(pxarray, terramd) # get temperature
            # Filter out any
            pxarray[pxarray < 0] = numpy.nan
            mean_tc = numpy.nanmean(pxarray) - 273.15

            # Create BETY-ready CSV
            if not numpy.isnan(mean_tc):
                traits['surface_temperature'] = str(mean_tc)
                traits['site'] = plotname
                traits['local_datetime'] = timestamp + "T12:00:00"
                trait_list = generate_traits_list(traits)
                #generate_csv(tmp_csv, fields, trait_list)
                csv_file.write(','.join(map(str, trait_list)) + '\n')

                # Prepare and submit datapoint
                centroid_lonlat = json.loads(
                    centroid_from_geojson(bounds))["coordinates"]
                time_fmt = timestamp + "T12:00:00-07:00"
                dpmetadata = {
                    "source":
                    host + ("" if host.endswith("/") else "/") + "files/" +
                    resource['id'],
                    "surface_temperature":
                    str(mean_tc)
                }
                create_datapoint_with_dependencies(
                    connector, host, secret_key, "IR Surface Temperature",
                    (centroid_lonlat[1], centroid_lonlat[0]), time_fmt,
                    time_fmt, dpmetadata, timestamp)

            successful_plots += 1

        # submit CSV to BETY
        csv_file.close()
        submit_traits(tmp_csv, betykey=self.bety_key)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(
            host, self.extractor_info, resource['parent']['id'], {
                "plots_processed":
                successful_plots,
                "plots_skipped":
                len(all_plots) - successful_plots,
                "betydb_link":
                "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature"
            }, 'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        metadata)

        self.end_message()
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        # Get full list of experiment plots using date as filter
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        all_plots = get_site_boundaries(timestamp, city='Maricopa')

        successful_plots = 0
        for plotname in all_plots:
            bounds = all_plots[plotname]

            # Use GeoJSON string to clip full field to this plot
            try:
                (pxarray, geotrans) = clip_raster(resource['local_paths'][0],
                                                  bounds)
                if len(pxarray.shape) < 3:
                    logging.error("unexpected array shape for %s (%s)" %
                                  (plotname, pxarray.shape))
                    continue

                plot_img = create_image(pxarray, "plot_image.png")
                plot_csv = "plot.csv"
                self.generate_table_only(plot_img, plot_csv)
                trait_vals = self.extract_vals_from_csv(plot_csv)

                successful_plots += 1
                if successful_plots % 10 == 0:
                    logging.info("processed %s/%s plots successfully" %
                                 (successful_plots, len(all_plots)))
            except:
                logging.error("error generating traits for %s" % plotname)
                continue

            # Create BETY-ready CSV
            (fields, traits) = self.get_traits_table()
            for tr in trait_vals:
                traits[tr] = str(trait_vals[tr])
            traits['site'] = plotname
            traits['local_datetime'] = timestamp + "T12-00-00-000"
            trait_list = self.generate_traits_list(traits)
            self.generate_cc_csv(plot_csv, fields, trait_list)

            # submit CSV to BETY
            submit_traits(plot_csv, self.bety_key)

            # Prepare and submit datapoint
            centroid_lonlat = json.loads(
                centroid_from_geojson(bounds))["coordinates"]
            time_fmt = timestamp + "T12:00:00-07:00"
            dpmetadata = {
                "source": host + "files/" + resource['id'],
            }
            for tr in trait_vals:
                dpmetadata[tr] = str(trait_vals[tr])
            create_datapoint_with_dependencies(
                connector, host, secret_key, "Canopy Cover",
                (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
                dpmetadata, timestamp)

            os.remove(plot_img)
            os.remove(plot_csv)

        # Add metadata to original dataset indicating this was run
        ext_meta = build_metadata(
            host,
            self.extractor_info,
            resource['parent']['id'],
            {
                "plots_processed": successful_plots,
                "plots_skipped": len(all_plots) - successful_plots
                # TODO: add link to BETY trait IDs
            },
            'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        ext_meta)

        self.end_message()
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        (fields, traits) = pcia.get_traits_table()

        # get imgs paths, filter out the json paths
        img_paths = []
        for p in resource['local_paths']:
            if p[-4:] == '.jpg' or p[-4:] == '.png':
                img_paths.append(p)

            # Get metadata for avg_traits from file metadata
            elif p.endswith("_metadata.json"
                            ) and not p.endswith("_dataset_metadata.json"):
                with open(p) as ds_md:
                    md_set = json.load(ds_md)
                    for md in md_set:
                        if 'content' in md:
                            needed_fields = [
                                'plant_barcode', 'genotype', 'treatment',
                                'imagedate'
                            ]
                            for fld in needed_fields:
                                if traits[fld] == '' and fld in md['content']:
                                    if fld == 'imagedate':
                                        traits[fld] = md['content'][
                                            fld].replace(" ", "T")
                                    else:
                                        traits[fld] = md['content'][fld]

        # build list of file descriptor dictionaries with sensor info
        file_objs = []
        for f in resource['files']:
            found_info = False
            image_id = f['id']
            # Get from file metadata if possible
            file_md = download_file_metadata(connector, host, secret_key,
                                             f['id'])
            for md in file_md:
                if 'content' in md:
                    mdc = md['content']
                    if ('rotation_angle'
                            in mdc) and ('perspective'
                                         in mdc) and ('camera_type' in mdc):
                        if 'experiment_id' in mdc:
                            found_info = True
                            # experiment ID determines what PlantCV code gets executed
                            experiment = mdc['experiment_id']
                            # perspective = 'side-view' / 'top-view'
                            perspective = mdc['perspective']
                            # angle = -1, 0, 90, 180, 270; set top-view angle to be -1 for later sorting
                            angle = mdc[
                                'rotation_angle'] if perspective != 'top-view' else -1
                            # camera_type = 'visible/RGB' / 'near-infrared'
                            camera_type = mdc['camera_type']

                            for pth in img_paths:
                                if re.findall(f['filename'], pth):
                                    file_objs.append({
                                        'perspective': perspective,
                                        'angle': angle,
                                        'camera_type': camera_type,
                                        'image_path': pth,
                                        'image_id': image_id,
                                        'experiment_id': experiment,
                                        'filename': f['filename']
                                    })

            if not found_info:
                # Get from filename if no metadata is found
                raw_name = re.findall(
                    r"(VIS|NIR|vis|nir)_(SV|TV|sv|tv)(_\d+)*", f["filename"])
                if raw_name:
                    raw_int = re.findall('\d+', raw_name[0][2])
                    angle = -1 if raw_int == [] else int(
                        raw_int[0])  # -1 for top-view, else angle
                    camera_type = raw_name[0][0]
                    perspective = raw_name[0][1].lower()

                    for pth in img_paths:
                        if re.findall(f['filename'], pth):
                            file_objs.append({
                                'perspective':
                                'side-view'
                                if perspective == 'sv' else 'top-view',
                                'angle':
                                angle,
                                'camera_type':
                                'visible/RGB'
                                if camera_type == 'vis' else 'near-infrared',
                                'image_path':
                                pth,
                                'image_id':
                                image_id,
                                'experiment_id':
                                'unknown',
                                'filename':
                                f['filename']
                            })

        # sort file objs by angle
        file_objs = sorted(file_objs, key=lambda k: k['angle'])

        # process images by matching angles with plantcv
        for i in [x for x in range(len(file_objs)) if x % 2 == 0]:
            if file_objs[i]['camera_type'] == 'visible/RGB':
                vis_src = file_objs[i]['image_path']
                nir_src = file_objs[i + 1]['image_path']
                vis_id = file_objs[i]['image_id']
                nir_id = file_objs[i + 1]['image_id']
                experiment_id = file_objs[i]['experiment_id']
                vis_filename = file_objs[i]['filename']
                nir_filename = file_objs[i + 1]['filename']
            else:
                vis_src = file_objs[i + 1]['image_path']
                nir_src = file_objs[i]['image_path']
                vis_id = file_objs[i + 1]['image_id']
                nir_id = file_objs[i]['image_id']
                experiment_id = file_objs[i + 1]['experiment_id']
                vis_filename = file_objs[i + 1]['filename']
                nir_filename = file_objs[i]['filename']
            logging.info(
                '...processing: %s + %s' %
                (os.path.basename(vis_src), os.path.basename(nir_src)))

            # Read VIS image
            img, path, filename = pcv.readimage(vis_src)
            brass_mask = cv2.imread('masks/mask_brass_tv_z1_L1.png')
            # Read NIR image
            nir, path1, filename1 = pcv.readimage(nir_src)
            nir2 = cv2.imread(nir_src, -1)

            try:
                vis_out = os.path.join(self.output_dir,
                                       resource['dataset_info']['name'],
                                       vis_filename)
                nir_out = os.path.join(self.output_dir,
                                       resource['dataset_info']['name'],
                                       nir_filename)
                if i == 0:
                    vn_traits = pcia.process_tv_images_core(
                        vis_id, img, nir_id, nir, nir2, brass_mask, traits,
                        experiment_id, vis_out, nir_out)
                else:
                    vn_traits = pcia.process_sv_images_core(
                        vis_id, img, nir_id, nir, nir2, traits, experiment_id,
                        vis_out, nir_out)

                logging.getLogger(__name__).info(
                    "...uploading resulting metadata")
                # upload the individual file metadata
                metadata = build_metadata(host, self.extractor_info, nir_id,
                                          vn_traits[0], 'file')
                upload_file_metadata(connector, host, secret_key, vis_id,
                                     metadata)
                metadata = build_metadata(host, self.extractor_info, nir_id,
                                          vn_traits[1], 'file')
                upload_file_metadata(connector, host, secret_key, nir_id,
                                     metadata)
                # Add PlantCV analysis images to dataset
                for image in vn_traits[2]:
                    pyclowder.files.upload_to_dataset(connector, host,
                                                      secret_key,
                                                      resource['id'], image)

            except Exception as e:
                logging.getLogger(__name__).error(
                    "...error generating vn_traits data; no metadata uploaded")
                logging.getLogger(__name__).error(e)

        # compose the summary traits
        trait_list = pcia.generate_traits_list(traits)

        # generate output CSV & send to Clowder + BETY
        tmp_csv = 'avg_traits.csv'
        pcia.generate_average_csv(tmp_csv, fields, trait_list)
        submit_traits(tmp_csv, self.bety_key)

        # Flag dataset as processed by extractor
        metadata = build_metadata(host, self.extractor_info, resource['id'],
                                  {"status": "COMPLETED"}, 'dataset')
        upload_ds_metadata(connector, host, secret_key, resource['id'],
                           metadata)

        self.end_message()