コード例 #1
0
def update_geostreams(connector, host, secret_key, geo_csv_header, geo_rows):
    """Sends the rows of csv data to geostreams
    Args:
        connector(obj): the message queue connector instance
        host(str): the URI of the host making the connection
        secret_key(str): used with the host API
        geo_csv_header(str): comma separated list of column headers
        geo_rows(list): list of strings that are comma separated column data (list of data rows)
    Notes:
        Header names expected are: 'lat', 'lon', 'dp_time', 'timestamp', 'source', 'value', and 'trait'
    """
    data = [geo_csv_header]
    data.extend(geo_rows)

    reader = csv.DictReader(data)
    idx = 1
    for row in reader:
        centroid_lonlat = [row['lon'], row['lat']]
        time_fmt = row['dp_time']
        timestamp = row['timestamp']
        dpmetadata = {"source": row['source'], "value": row['value']}
        trait = row['trait']

        idx += 1
        create_datapoint_with_dependencies(
            connector, host, secret_key, trait,
            (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
            dpmetadata, timestamp)
コード例 #2
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        successful_plots = 0
        with open(resource['local_paths'][0], 'rb') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                centroid_lonlat = [row['lon'], row['lat']]
                time_fmt = row['dp_time']
                timestamp = row['timestamp']
                dpmetadata = {
                    "source": row['source'],
                    "value": row['value']
                }
                trait = row['trait']

                create_datapoint_with_dependencies(connector, host, secret_key, trait,
                                                   (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
                                                   dpmetadata, timestamp)
                successful_plots += 1

        # Add metadata to original dataset indicating this was run
        self.log_info(resource, "updating file metadata (%s)" % resource['id'])
        ext_meta = build_metadata(host, self.extractor_info, resource['id'], {
            "plots_processed": successful_plots,
        }, 'file')
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        self.end_message(resource)
コード例 #3
0
def upload_to_geostreams(file, clowder_id):
    conn = Connector(
        None, mounted_paths={"/home/clowder/sites": "/home/clowder/sites"})

    successful_plots = 0
    with open(file, 'rb') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            centroid_lonlat = [row['lon'], row['lat']]
            time_fmt = row['dp_time']
            timestamp = row['timestamp']
            dpmetadata = {"source": row['source'], "value": row['value']}
            trait = row['trait']

            create_datapoint_with_dependencies(
                conn, host, secret_key, trait,
                (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
                dpmetadata, timestamp)
            successful_plots += 1

    # Extractor metadata
    extractor_info = {
        "extractor_name": "terra.geostreams",
        "extractor_version": "1.0",
        "extractor_author": "Max Burnette <*****@*****.**>",
        "extractor_description": "Geostreams CSV uploader",
        "extractor_repo": "https://github.com/terraref/computing-pipeline.git"
    }

    # Add metadata to original dataset indicating this was run
    ext_meta = build_metadata(host, extractor_info, clowder_id, {
        "plots_processed": successful_plots,
    }, 'file')
    upload_metadata(conn, host, secret_key, clowder_id, ext_meta)
コード例 #4
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        terra_md = resource['metadata']
        ds_info = get_info(connector, host, secret_key, resource['id'])

        # @begin extract_positional_info_from_metadata
        # @in new_dataset_added
        # @out gantry_geometry
        # @end extract_positional_info

        # Get sensor from datasetname
        self.log_info(resource, "Getting position information from metadata")
        (streamprefix, timestamp) = ds_info['name'].split(' - ')
        date = timestamp.split("__")[0]
        scan_time = calculate_scan_time(terra_md)
        streamprefix += " Datasets"
        dpmetadata = {
         "source_dataset": host + ("" if host.endswith("/") else "/") + \
               "datasets/" + resource['id'],
         "dataset_name": ds_info['name']
        }

        centroid = None
        bbox = None
        for entry in terra_md['spatial_metadata']:
            if 'centroid' in terra_md['spatial_metadata'][entry]:
                centroid = terra_md['spatial_metadata'][entry]['centroid']
            if 'bounding_box' in terra_md['spatial_metadata'][entry]:
                bbox = terra_md['spatial_metadata'][entry]['bounding_box']
                bbox = {
                    "type": bbox['type'],
                    "coordinates": [bbox['coordinates']]
                }

        if 'site_metadata' in terra_md:
            # We've already determined the plot associated with this dataset so we can skip some work
            self.log_info(
                resource,
                "Creating datapoint without lookup in %s" % streamprefix)
            create_datapoint_with_dependencies(
                connector, host, secret_key, streamprefix, centroid, scan_time,
                scan_time, dpmetadata, date, bbox,
                terra_md['site_metadata']['sitename'])

        else:
            # We need to do the traditional querying for plot
            self.log_info(
                resource,
                "Creating datapoint with lookup in %s" % streamprefix)
            create_datapoint_with_dependencies(connector, host, secret_key,
                                               streamprefix, centroid,
                                               scan_time, scan_time,
                                               dpmetadata, date, bbox)

        # Attach geometry to Clowder metadata as well
        self.log_info(resource, "Uploading dataset metadata")
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"datapoints_added": 1}, 'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        self.end_message(resource)
コード例 #5
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        tmp_csv = "canopycovertraits.csv"
        csv_file = open(tmp_csv, 'w')
        (fields, traits) = ccCore.get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        # Get full list of experiment plots using date as filter
        logging.info(connector)
        logging.info(host)
        logging.info(secret_key)
        logging.info(resource)
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        all_plots = get_site_boundaries(timestamp, city='Maricopa')

        successful_plots = 0
        for plotname in all_plots:
            bounds = all_plots[plotname]

            # Use GeoJSON string to clip full field to this plot
            try:
                (pxarray, geotrans) = clip_raster(resource['local_paths'][0],
                                                  bounds)
                if len(pxarray.shape) < 3:
                    logging.error("unexpected array shape for %s (%s)" %
                                  (plotname, pxarray.shape))
                    continue
                ccVal = ccCore.gen_cc_for_img(rollaxis(pxarray, 0, 3), 5)
                ccVal *= 100.0  # Make 0-100 instead of 0-1
                successful_plots += 1
                if successful_plots % 10 == 0:
                    logging.info("processed %s/%s plots successfully" %
                                 (successful_plots, len(all_plots)))
            except:
                logging.error("error generating cc for %s" % plotname)
                continue

            traits['canopy_cover'] = str(ccVal)
            traits['site'] = plotname
            traits['local_datetime'] = timestamp + "T12:00:00"
            trait_list = ccCore.generate_traits_list(traits)

            csv_file.write(','.join(map(str, trait_list)) + '\n')

            # Prepare and submit datapoint
            centroid_lonlat = json.loads(
                centroid_from_geojson(bounds))["coordinates"]
            time_fmt = timestamp + "T12:00:00-07:00"
            dpmetadata = {
                "source":
                host + ("" if host.endswith("/") else "/") + "files/" +
                resource['id'],
                "canopy_cover":
                ccVal
            }
            create_datapoint_with_dependencies(
                connector, host, secret_key, "Canopy Cover",
                (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
                dpmetadata, timestamp)

        # submit CSV to BETY
        csv_file.close()
        submit_traits(tmp_csv, betykey=self.bety_key)

        # Add metadata to original dataset indicating this was run
        ext_meta = build_metadata(
            host, self.extractor_info, resource['parent']['id'], {
                "plots_processed":
                successful_plots,
                "plots_skipped":
                len(all_plots) - successful_plots,
                "betydb_link":
                "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=canopy_cover"
            }, 'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        ext_meta)

        self.end_message()
コード例 #6
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        tmp_csv = "meantemptraits.csv"
        csv_file = open(tmp_csv, 'w')
        (fields, traits) = get_traits_table()
        csv_file.write(','.join(map(str, fields)) + '\n')

        # Get full list of experiment plots using date as filter
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        dsmd = download_metadata(connector, host, secret_key,
                                 resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        all_plots = get_site_boundaries(timestamp, city='Maricopa')

        successful_plots = 0
        for plotname in all_plots:
            bounds = all_plots[plotname]

            # Use GeoJSON string to clip full field to this plot
            (pxarray, geotrans) = clip_raster(resource['local_paths'][0],
                                              bounds)
            #tc = getFlir.rawData_to_temperature(pxarray, terramd) # get temperature
            # Filter out any
            pxarray[pxarray < 0] = numpy.nan
            mean_tc = numpy.nanmean(pxarray) - 273.15

            # Create BETY-ready CSV
            if not numpy.isnan(mean_tc):
                traits['surface_temperature'] = str(mean_tc)
                traits['site'] = plotname
                traits['local_datetime'] = timestamp + "T12:00:00"
                trait_list = generate_traits_list(traits)
                #generate_csv(tmp_csv, fields, trait_list)
                csv_file.write(','.join(map(str, trait_list)) + '\n')

                # Prepare and submit datapoint
                centroid_lonlat = json.loads(
                    centroid_from_geojson(bounds))["coordinates"]
                time_fmt = timestamp + "T12:00:00-07:00"
                dpmetadata = {
                    "source":
                    host + ("" if host.endswith("/") else "/") + "files/" +
                    resource['id'],
                    "surface_temperature":
                    str(mean_tc)
                }
                create_datapoint_with_dependencies(
                    connector, host, secret_key, "IR Surface Temperature",
                    (centroid_lonlat[1], centroid_lonlat[0]), time_fmt,
                    time_fmt, dpmetadata, timestamp)

            successful_plots += 1

        # submit CSV to BETY
        csv_file.close()
        submit_traits(tmp_csv, betykey=self.bety_key)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(
            host, self.extractor_info, resource['parent']['id'], {
                "plots_processed":
                successful_plots,
                "plots_skipped":
                len(all_plots) - successful_plots,
                "betydb_link":
                "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature"
            }, 'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        metadata)

        self.end_message()
コード例 #7
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        # Get full list of experiment plots using date as filter
        ds_info = get_info(connector, host, secret_key,
                           resource['parent']['id'])
        timestamp = ds_info['name'].split(" - ")[1]
        all_plots = get_site_boundaries(timestamp, city='Maricopa')

        successful_plots = 0
        for plotname in all_plots:
            bounds = all_plots[plotname]

            # Use GeoJSON string to clip full field to this plot
            try:
                (pxarray, geotrans) = clip_raster(resource['local_paths'][0],
                                                  bounds)
                if len(pxarray.shape) < 3:
                    logging.error("unexpected array shape for %s (%s)" %
                                  (plotname, pxarray.shape))
                    continue

                plot_img = create_image(pxarray, "plot_image.png")
                plot_csv = "plot.csv"
                self.generate_table_only(plot_img, plot_csv)
                trait_vals = self.extract_vals_from_csv(plot_csv)

                successful_plots += 1
                if successful_plots % 10 == 0:
                    logging.info("processed %s/%s plots successfully" %
                                 (successful_plots, len(all_plots)))
            except:
                logging.error("error generating traits for %s" % plotname)
                continue

            # Create BETY-ready CSV
            (fields, traits) = self.get_traits_table()
            for tr in trait_vals:
                traits[tr] = str(trait_vals[tr])
            traits['site'] = plotname
            traits['local_datetime'] = timestamp + "T12-00-00-000"
            trait_list = self.generate_traits_list(traits)
            self.generate_cc_csv(plot_csv, fields, trait_list)

            # submit CSV to BETY
            submit_traits(plot_csv, self.bety_key)

            # Prepare and submit datapoint
            centroid_lonlat = json.loads(
                centroid_from_geojson(bounds))["coordinates"]
            time_fmt = timestamp + "T12:00:00-07:00"
            dpmetadata = {
                "source": host + "files/" + resource['id'],
            }
            for tr in trait_vals:
                dpmetadata[tr] = str(trait_vals[tr])
            create_datapoint_with_dependencies(
                connector, host, secret_key, "Canopy Cover",
                (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt,
                dpmetadata, timestamp)

            os.remove(plot_img)
            os.remove(plot_csv)

        # Add metadata to original dataset indicating this was run
        ext_meta = build_metadata(
            host,
            self.extractor_info,
            resource['parent']['id'],
            {
                "plots_processed": successful_plots,
                "plots_skipped": len(all_plots) - successful_plots
                # TODO: add link to BETY trait IDs
            },
            'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        ext_meta)

        self.end_message()