Python build_dataset_hierarchy_crawl Examples

Programming Language: Python

Namespace/Package Name: terrautils.extractors

Method/Function: build_dataset_hierarchy_crawl

Examples at hotexamples.com: 8

Python build_dataset_hierarchy_crawl - 8 examples found. These are the top rated real world Python examples of terrautils.extractors.build_dataset_hierarchy_crawl extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: terra_clipbyshape.py Project: Chris-Schnaufer/extractor_travis_test

    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        """Performs plot level image extraction

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            resource(dict): dictionary containing the resources associated with the request
            parameters(json): json object of the triggering message contents
        """
        self.start_message(resource)
        super(ClipByShape, self).process_message(connector, host, secret_key,
                                                 resource, parameters)

        # Handle any parameters
        if isinstance(parameters, basestring):
            parameters = json.loads(parameters)
        if isinstance(parameters, unicode):
            parameters = json.loads(str(parameters))

        # Initialize local variables
        dataset_name = parameters["datasetname"]
        season_name, experiment_name = "Unknown Season", "Unknown Experiment"
        datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None

        # Array containing the links to uploaded files
        uploaded_file_ids = []

        # Find the files we're interested in
        # pylint: disable=line-too-long
        (shapefile, shxfile, dbffile,
         imagefiles) = self.find_shape_image_files(resource['local_paths'],
                                                   resource['triggering_file'])
        # pylint: enable=line-too-long
        if shapefile is None:
            self.log_skip(resource, "No shapefile found")
            return
        if shxfile is None:
            self.log_skip(resource, "No SHX file found")
            return
        num_image_files = len(imagefiles)
        if num_image_files <= 0:
            self.log_skip(resource,
                          "No image files with geographic boundaries found")
            return

        # Get the best username, password, and space
        old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass,
                                     self.clowderspace)
        self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context(
        )

        # Ensure that the clowder information is valid
        if not confirm_clowder_info(host, secret_key, self.clowderspace,
                                    self.clowder_user, self.clowder_pass):
            self.log_error(resource, "Clowder configuration is invalid. Not processing " +\
                                     "request")
            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)
            return

        # Change the base path of files to include the user by tweaking the sensor's value
        sensor_old_base = None
        if self.get_terraref_metadata is None:
            _, new_base = self.get_username_with_base_path(
                host, secret_key, resource['id'], self.sensors.base)
            sensor_old_base = self.sensors.base
            self.sensors.base = new_base

        try:
            # Build up a list of image IDs
            image_ids = {}
            if 'files' in resource:
                for one_image in imagefiles:
                    image_name = os.path.basename(one_image)
                    for res_file in resource['files']:
                        if ('filename' in res_file) and ('id' in res_file) and \
                                                            (image_name == res_file['filename']):
                            image_ids[image_name] = res_file['id']

            # Get timestamps. Also get season and experiment information for Clowder collections
            datestamp = self.find_datestamp(dataset_name)
            timestamp = timestamp_to_terraref(
                self.find_timestamp(dataset_name))
            (season_name, experiment_name,
             _) = self.get_season_and_experiment(datestamp, self.sensor_name)

            if self.experiment_metadata:
                if 'extractors' in self.experiment_metadata:
                    extractor_json = self.experiment_metadata['extractors']
                    if 'shapefile' in extractor_json:
                        if 'plot_column_name' in extractor_json['shapefile']:
                            plot_name_idx = extractor_json['shapefile'][
                                'plot_column_name']

            # Check our current local variables
            if dbffile is None:
                self.log_info(resource,
                              "DBF file not found, using default plot naming")
            self.log_info(resource, "Extracting plots using shapefile '" + \
                                                        os.path.basename(shapefile) + "'")

            # Load the shapes and find the plot name column if we have a DBF file
            shape_in = ogr.Open(shapefile)
            layer = shape_in.GetLayer(
                os.path.split(os.path.splitext(shapefile)[0])[1])
            feature = layer.GetNextFeature()
            layer_ref = layer.GetSpatialRef()

            if dbffile:
                shape_table = DBF(dbffile,
                                  lowernames=True,
                                  ignore_missing_memofile=True)
                shape_rows = iter(list(shape_table))

                # Make sure if we have the column name of plot-names specified that it exists in
                # the shapefile
                column_names = shape_table.field_names
                if not plot_name_idx is None:
                    if not find_all_plot_names(plot_name_idx, column_names):
                        ValueError(
                            resource,
                            "Shapefile data does not have specified plot name"
                            + " column '" + plot_name_idx + "'")

                # Lookup a plot name field to use
                if plot_name_idx is None:
                    for one_name in column_names:
                        # pylint: disable=line-too-long
                        if one_name == "observationUnitName":
                            plot_name_idx = one_name
                            break
                        elif (one_name.find('plot') >= 0) and (
                            (one_name.find('name') >= 0)
                                or one_name.find('id')):
                            plot_name_idx = one_name
                            break
                        elif one_name == 'id':
                            plot_name_idx = one_name
                            break
                        # pylint: enable=line-too-long
                if plot_name_idx is None:
                    ValueError(
                        resource,
                        "Shapefile data does not have a plot name field '" +
                        os.path.basename(dbffile) + "'")

            # Setup for the extracted plot images
            plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \
                                                                                    " (By Plot)"

            # Loop through each polygon and extract plot level data
            alternate_plot_id = 0
            while feature:

                # Current geometry to extract
                plot_poly = feature.GetGeometryRef()
                if layer_ref:
                    plot_poly.AssignSpatialReference(layer_ref)
                plot_spatial_ref = plot_poly.GetSpatialReference()

                # Determie the plot name to use
                plot_name = None
                alternate_plot_id = alternate_plot_id + 1
                if shape_rows and plot_name_idx:
                    try:
                        row = next(shape_rows)
                        plot_name = get_plot_name(plot_name_idx, row)
                    except StopIteration:
                        pass
                if not plot_name:
                    plot_name = "plot_" + str(alternate_plot_id)

                # Determine output dataset name
                leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp
                self.log_info(
                    resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
                    (season_name, experiment_name, plot_display_name,
                     datestamp[:4], datestamp[5:7], datestamp[8:10],
                     leaf_dataset))

                # Create the dataset, even if we have no data to put in it, so that the caller knows
                # it was addressed
                target_dsid = build_dataset_hierarchy_crawl(
                    host,
                    secret_key,
                    self.clowder_user,
                    self.clowder_pass,
                    self.clowderspace,
                    season_name,
                    experiment_name,
                    plot_display_name,
                    datestamp[:4],
                    datestamp[5:7],
                    datestamp[8:10],
                    leaf_ds_name=leaf_dataset)

                # Loop through all the images looking for overlap
                for filename in imagefiles:

                    # Get the bounds. We also get the reference systems in case we need to convert
                    # between them
                    bounds = imagefiles[filename]['bounds']
                    bounds_spatial_ref = bounds.GetSpatialReference()

                    # Checking for geographic overlap and skip if there is none
                    if not bounds_spatial_ref.IsSame(plot_spatial_ref):
                        # We need to convert coordinate system before an intersection
                        transform = osr.CoordinateTransformation(
                            bounds_spatial_ref, plot_spatial_ref)
                        new_bounds = bounds.Clone()
                        if new_bounds:
                            new_bounds.Transform(transform)
                            intersection = plot_poly.Intersection(new_bounds)
                            new_bounds = None
                    else:
                        # Same coordinate system. Simple intersection
                        intersection = plot_poly.Intersection(bounds)

                    if intersection.GetArea() == 0.0:
                        self.log_info(resource, "Skipping image: " + filename)
                        continue

                    # Determine where we're putting the clipped file on disk and determine overwrite
                    # pylint: disable=unexpected-keyword-arg
                    out_file = self.sensors.create_sensor_path(
                        timestamp,
                        filename=os.path.basename(filename),
                        plot=plot_name,
                        subsensor=self.sensor_name)
                    if (file_exists(out_file) and not self.overwrite):
                        # The file exists and don't want to overwrite it
                        self.logger.warn("Skipping existing output file: %s",
                                         out_file)
                        continue

                    self.log_info(
                        resource, "Attempting to clip '" + filename +
                        "' to polygon number " + str(alternate_plot_id))

                    # Create destination folder on disk if we haven't done that already
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    # Clip the raster
                    bounds_tuple = polygon_to_tuples_transform(
                        plot_poly, bounds_spatial_ref)

                    clip_pix = clip_raster(filename,
                                           bounds_tuple,
                                           out_path=out_file)
                    if clip_pix is None:
                        self.log_error(
                            resource,
                            "Failed to clip image to plot name " + plot_name)
                        continue

                    # Upload the clipped image to the dataset
                    found_in_dest = check_file_in_dataset(
                        connector,
                        host,
                        secret_key,
                        target_dsid,
                        out_file,
                        remove=self.overwrite)
                    if not found_in_dest or self.overwrite:
                        image_name = os.path.basename(filename)
                        content = {
                            "comment":
                            "Clipped from shapefile " +
                            os.path.basename(shapefile),
                            "imageName":
                            image_name
                        }
                        if image_name in image_ids:
                            content['imageID'] = image_ids[image_name]

                        fileid = upload_to_dataset(connector, host,
                                                   self.clowder_user,
                                                   self.clowder_pass,
                                                   target_dsid, out_file)
                        uploaded_file_ids.append(fileid)

                        # Generate our metadata
                        meta = build_metadata(host, self.extractor_info,
                                              fileid, content, 'file')
                        clowder_file.upload_metadata(connector, host,
                                                     secret_key, fileid, meta)
                    else:
                        self.logger.warn(
                            "Skipping existing file in dataset: %s", out_file)

                    self.created += 1
                    self.bytes += os.path.getsize(out_file)

                # Get the next shape to extract
                feature = layer.GetNextFeature()

            # Tell Clowder this is completed so subsequent file updates don't daisy-chain
            id_len = len(uploaded_file_ids)
            if id_len > 0 or self.created > 0:
                extractor_md = build_metadata(
                    host, self.extractor_info, resource['id'],
                    {"files_created": uploaded_file_ids}, 'dataset')
                self.log_info(
                    resource,
                    "Uploading shapefile plot extractor metadata to Level_2 dataset: "
                    + str(extractor_md))
                clowder_dataset.remove_metadata(connector, host, secret_key,
                                                resource['id'],
                                                self.extractor_info['name'])
                clowder_dataset.upload_metadata(connector, host, secret_key,
                                                resource['id'], extractor_md)
            else:
                self.logger.warn(
                    "Skipping dataset metadata updating since no files were loaded"
                )

        finally:
            # Signal end of processing message and restore changed variables. Be sure to restore
            # changed variables above with early returns
            if not sensor_old_base is None:
                self.sensors.base = sensor_old_base

            self.clowder_user, self.clowder_pass, self.clowderspace = (
                old_un, old_pw, old_space)
            self.end_message(resource)

Example #2

Show file

    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Build list of JSON files
        json_files = []
        for f in resource['files']:
            if f['filename'].endswith("_environmentlogger.json"):
                if f['filepath'].startswith("/home/clowder"):
                    json_files.append(f['filepath'].replace(
                        "/home/clowder", "/home/extractor"))
                else:
                    json_files.append(f['filepath'])
        json_files.sort()

        # Determine full output path
        timestamp = resource['name'].split(" - ")[1]
        out_fullday_netcdf = self.sensors.create_sensor_path(timestamp)
        temp_out_full = os.path.join(os.path.dirname(out_fullday_netcdf),
                                     "temp_full.nc")
        temp_out_single = temp_out_full.replace("_full.nc", "_single.nc")
        geo_csv = out_fullday_netcdf.replace(".nc", "_geo.csv")

        if not file_exists(temp_out_full):
            for json_file in json_files:
                self.log_info(
                    resource, "converting %s to netCDF & appending" %
                    os.path.basename(json_file))
                ela.mainProgramTrigger(json_file, temp_out_single)
                cmd = "ncrcat --record_append %s %s" % (temp_out_single,
                                                        temp_out_full)
                subprocess.call([cmd], shell=True)
                os.remove(temp_out_single)

            shutil.move(temp_out_full, out_fullday_netcdf)
            self.created += 1
            self.bytes += os.path.getsize(out_fullday_netcdf)

        # Write out geostreams.csv
        if not file_exists(geo_csv):
            self.log_info(resource, "writing geostreams CSV")
            geo_file = open(geo_csv, 'w')
            geo_file.write(','.join([
                'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value',
                'timestamp'
            ]) + '\n')
            with Dataset(out_fullday_netcdf, "r") as ncdf:
                streams = set([
                    sensor_info.name
                    for sensor_info in ncdf.variables.values()
                    if sensor_info.name.startswith('sensor')
                ])
                for stream in streams:
                    if stream != "sensor_spectrum":
                        try:
                            memberlist = ncdf.get_variables_by_attributes(
                                sensor=stream)
                            for members in memberlist:
                                data_points = _produce_attr_dict(members)
                                for index in range(len(data_points)):
                                    dp_obj = data_points[index]
                                    if dp_obj["sensor"] == stream:
                                        time_format = "%Y-%m-%dT%H:%M:%S-07:00"
                                        time_point = (datetime.datetime(year=1970, month=1, day=1) + \
                                                      datetime.timedelta(days=ncdf.variables["time"][index])).strftime(time_format)

                                        geo_file.write(','.join([
                                            "Full Field - Environmental Logger",
                                            "(EL) %s" % stream,
                                            str(33.075576),
                                            str(-111.974304), time_point,
                                            host +
                                            ("" if host.endswith("/") else "/"
                                             ) + "datasets/" + resource['id'],
                                            '"%s"' % json.dumps(dp_obj).
                                            replace('"', '""'), timestamp
                                        ]) + '\n')

                        except:
                            self.log_error(
                                resource,
                                "NetCDF attribute not found: %s" % stream)

        # Fetch dataset ID by dataset name if not provided
        target_dsid = build_dataset_hierarchy_crawl(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            None,
            None,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        ds_files = get_file_list(connector, host, secret_key, target_dsid)
        found_full = False
        found_csv = False
        for f in ds_files:
            if f['filename'] == os.path.basename(out_fullday_netcdf):
                found_full = True
            if f['filename'] == os.path.basename(geo_csv):
                found_csv = True
        if not found_full:
            upload_to_dataset(connector, host, secret_key, target_dsid,
                              out_fullday_netcdf)
        if not found_csv:
            geoid = upload_to_dataset(connector, host, secret_key, target_dsid,
                                      geo_csv)
            self.log_info(resource,
                          "triggering geostreams extractor on %s" % geoid)
            submit_extraction(connector, host, secret_key, geoid,
                              "terra.geostreams")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"output_dataset": target_dsid}, 'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        self.end_message(resource)

Example #3

Show file

    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get BIN file and metadata
        bin_file, terra_md_full = None, None
        for f in resource['local_paths']:
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera')
            elif f.endswith('_ir.bin'):
                bin_file = f
        if None in [bin_file, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp)
        tiff_path = self.sensors.create_sensor_path(timestamp)
        png_path = tiff_path.replace(".tif", ".png")
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product
        self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
        remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
        terra_md_trim = get_terraref_metadata(all_dsmd)
        if updated_experiment is not None:
            terra_md_trim['experiment_metadata'] = updated_experiment
        terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
        level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        skipped_png = False
        if not file_exists(png_path) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating & uploading %s" % png_path)
            raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
            raw_data = numpy.rot90(raw_data, 3)
            create_image(raw_data, png_path, self.scale_values)
            self.created += 1
            self.bytes += os.path.getsize(png_path)
        else:
            skipped_png = True
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        if not file_exists(tiff_path) or self.overwrite:
            # Generate temperature matrix and perform actual processing
            self.log_info(resource, "creating & uploading %s" % tiff_path)
            gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box'])
            if skipped_png:
                raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
                raw_data = numpy.rot90(raw_data, 3)
            tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature
            create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full)
            self.created += 1
            self.bytes += os.path.getsize(tiff_path)
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)

Example #4

Show file

    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        """Process the message requesting the ODM extractor to run

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            resource(dict): dictionary containing the resources associated with the request
            parameters(json): json object of the triggering message contents
        """

        # Start of message processing
        self.start_message(resource)
        TerrarefExtractor.process_message(self, connector, host, secret_key,
                                          resource, parameters)

        # Handle any parameters
        if isinstance(parameters, basestring):
            parameters = json.loads(parameters)
        if isinstance(parameters, unicode):
            parameters = json.loads(str(parameters))

        # Array of files to upload once processing is done
        self.files_to_upload = []

        # Our cache of files to upload
        self.cache_folder = tempfile.mkdtemp()

        # We are only handling one sensor type here. ODM generates additional sensor outputs
        # that may not be available for upload; we handle those as we see them in upload_file()
        # above
        sensor_type = "rgb"

        # Initialize more local variables
        scan_name = parameters["scan_type"] if "scan_type" in parameters else ""

        # Setup overrides and get the restore function
        restore_fn = self.setup_overrides(host, secret_key, resource)
        if not restore_fn:
            self.end_message(resource)
            return

        try:
            # Get the best timestamp
            timestamp = timestamp_to_terraref(
                self.find_timestamp(resource,
                                    resource['dataset_info']['name']))
            season_name, experiment_name, _ = self.get_season_and_experiment(
                timestamp, self.sensor_name)

            # Generate the file names
            out_tif_full = self.sensors.get_sensor_path(
                timestamp, opts=[sensor_type, scan_name]).replace(" ", "_")
            out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif")
            out_tif_medium = out_tif_full.replace(".tif", "_10pct.tif")
            out_png = out_tif_medium.replace(".tif", ".png")
            out_dir = os.path.dirname(out_tif_full)

            # Generate dictionary of sensor output folders and file names
            sensor_maps = {
                sensor_type: {
                    "dir": out_dir,
                    "name": os.path.basename(out_tif_full)
                }
            }
            fsm = self.filename_sensor_maps
            for one_map in fsm:
                cur_sensor = fsm[one_map]
                if not cur_sensor in sensor_maps:
                    sensor_path = self.sensors.get_sensor_path(
                        timestamp,
                        sensor=cur_sensor,
                        opts=[cur_sensor, scan_name]).replace(" ", "_")

                    sensor_maps[cur_sensor] = {
                        "dir": os.path.dirname(sensor_path),
                        "name": os.path.basename(sensor_path)
                    }
            self.sensor_maps = sensor_maps

            # Only generate what we need to by checking files on disk
            thumb_exists, med_exists, full_exists, png_exists, only_png = \
                                                                False, False, False, False, False

            if file_exists(out_tif_thumb):
                thumb_exists = True
            if file_exists(out_tif_medium):
                med_exists = True
            if file_exists(out_tif_full):
                full_exists = True
            if file_exists(out_png):
                png_exists = True
            if thumb_exists and med_exists and full_exists and not self.overwrite_ok:
                if png_exists:
                    self.log_skip(resource, "all outputs already exist")
                    return
                else:
                    self.log_info(resource, "all outputs already exist (10% PNG thumbnail must" \
                                            " still be generated)")
                    only_png = True

            # If we need the whole set of files, create them
            if not only_png:
                # Override the output file name. We don't save anything here because we'll override
                # it the next time through
                self.args.orthophotoname = os.path.splitext(
                    os.path.basename(out_tif_full))[0]

                # Run the stitch process
                OpenDroneMapStitch.process_message(self, connector, host,
                                                   secret_key, resource,
                                                   parameters)

                # Look up the name of the full sized orthomosaic
                basename = os.path.basename(out_tif_full)
                srcname = None
                for f in self.files_to_upload:
                    if f["dest_name"] == basename:
                        srcname = os.path.join(self.cache_folder,
                                               f["source_name"])
                        break

                # Generate other file sizes from the original orthomosaic
                if srcname and not file_exists(out_tif_medium):
                    self.log_info(
                        resource,
                        "Converting orthomosaic to %s..." % out_tif_medium)
                    outname = os.path.join(self.cache_folder,
                                           os.path.basename(out_tif_medium))
                    cmd = "gdal_translate -outsize %s%% %s%% %s %s" % (
                        10, 10, srcname, outname)
                    subprocess.call(cmd, shell=True)

                if srcname and not file_exists(out_tif_thumb):
                    self.log_info(
                        resource,
                        "Converting orthomosaic to %s..." % out_tif_thumb)
                    outname = os.path.join(self.cache_folder,
                                           os.path.basename(out_tif_thumb))
                    cmd = "gdal_translate -outsize %s%% %s%% %s %s" % (
                        2, 2, srcname, outname)
                    subprocess.call(cmd, shell=True)

            # We're here due to possibly needing the PNG Thumbnail
            srcname = os.path.join(self.cache_folder,
                                   os.path.basename(out_tif_medium))
            if (only_png or not png_exists) and file_exists(srcname):
                # Create PNG thumbnail
                self.log_info(resource, "Converting 10pct to %s..." % out_png)
                outname = os.path.join(self.cache_folder,
                                       os.path.basename(out_png))
                cmd = "gdal_translate -of PNG %s %s" % (srcname, outname)
                subprocess.call(cmd, shell=True)

            # Get dataset ID or create it, creating parent collections as needed
            leaf_ds_name = self.sensors.get_display_name() + ' - ' + timestamp
            ds_exists = get_datasetid_by_name(host, secret_key, leaf_ds_name)
            target_dsid = build_dataset_hierarchy_crawl(
                host,
                secret_key,
                self.clowder_user,
                self.clowder_pass,
                self.clowderspace,
                season_name,
                experiment_name,
                self.sensors.get_display_name(),
                timestamp[:4],
                timestamp[5:7],
                timestamp[8:10],
                leaf_ds_name=leaf_ds_name)

            if (self.overwrite_ok
                    or not ds_exists) and self.experiment_metadata:
                self.update_dataset_extractor_metadata(
                    connector, host, secret_key, target_dsid,
                    prepare_pipeline_metadata(self.experiment_metadata),
                    self.extractor_info['name'])

            # Store our dataset mappings for possible later use
            self.sensor_dsid_map = {sensor_type: target_dsid}

            # Upload full field image to Clowder
            file_ids = []
            if "files" in resource:
                for one_file in resource["files"]:
                    file_ids.append(one_file.get("id", ""))
            content = {
                "comment": "This stitched file is computed using OpenDroneMap. Change the" \
                           " parameters in extractors-opendronemap.txt to change the results.",
                "source_file_ids": ", ".join(file_ids)
            }

            # If we newly created these files, upload to Clowder
            file_name = os.path.basename(out_tif_thumb)
            file_path = os.path.join(self.cache_folder, file_name)
            if file_exists(file_path) and not thumb_exists:
                self.files_to_upload.append({
                    "source_path": self.cache_folder,
                    "source_name": file_name,
                    "dest_path": out_dir,
                    "dest_name": file_name,
                    "compress": False
                })

            file_name = os.path.basename(out_tif_medium)
            file_path = os.path.join(self.cache_folder, file_name)
            if file_exists(file_path) and not med_exists:
                self.files_to_upload.append({
                    "source_path": self.cache_folder,
                    "source_name": file_name,
                    "dest_path": out_dir,
                    "dest_name": file_name,
                    "compress": False
                })

            file_name = os.path.basename(out_png)
            file_path = os.path.join(self.cache_folder, file_name)
            if file_exists(file_path) and not png_exists:
                self.files_to_upload.append({
                    "source_path": self.cache_folder,
                    "source_name": file_name,
                    "dest_path": out_dir,
                    "dest_name": file_name,
                    "compress": False
                })

            # The main orthomosaic is already getting uploaded, but we must make sure its path
            # is correct
            srcname = os.path.basename(out_tif_full).lower()
            for one_file in self.files_to_upload:
                file_name = os.path.basename(one_file["dest_name"]).lower()
                if file_name == srcname:
                    one_file["dest_path"] = os.path.dirname(out_tif_full)
                    break

            # This function uploads the files into their appropriate datasets
            self.perform_uploads(connector, host, secret_key, resource,
                                 target_dsid, content, season_name,
                                 experiment_name, timestamp)

            # Cleanup the all destination folders skipping over ones that are in our "base" path
            # (we want to keep those)
            base = self.sensors.base
            if not self.cache_folder.startswith(base):
                check_delete_folder(self.cache_folder)
            for sp in self.sensor_maps:
                if not self.sensor_maps[sp]["dir"].startswith(base):
                    check_delete_folder(self.sensor_maps[sp]["dir"])

        finally:
            # We are done, restore fields we've modified (also be sure to restore fields in the
            # early returns in the code above)
            if restore_fn:
                restore_fn()
            self.end_message(resource)

Example #5

Show file

    def perform_uploads(self, connector, host, secret_key, resource,
                        default_dsid, content, season_name, experiment_name,
                        timestamp):
        """Perform the uploading of all the files we're put onto the upload list

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            default_dsid(str): the default dataset to load files to
            content(str): content information for the files we're uploading
            season_name(str): the name of the season
            experiment_name(str): the name of the experiment
            timestamp(str): the timestamp string associated with the source dataset

        Notes:
            We loop through the files, compressing, and remapping the names as needed.
            If the sensor associated with the file is missing, we upload the file to
            the default dataset. Otherwise, we use the dataset associated with the sensor
            and create the dataset if necessary
        """
        for one_file in self.files_to_upload:
            sourcefile = os.path.join(one_file["source_path"],
                                      one_file["source_name"])

            # Make sure we have the original file and then compress it if needed, or remane is
            if os.path.isfile(sourcefile):
                # make sure we have the full destination path
                if not os.path.exists(one_file["dest_path"]):
                    os.makedirs(one_file["dest_path"])

                resultfile = os.path.join(one_file["dest_path"],
                                          one_file["dest_name"])
                if one_file["compress"]:
                    resultfile = resultfile + ".zip"
                    with open(sourcefile, 'rb') as f_in:
                        with gzip.open(resultfile, 'wb') as f_out:
                            shutil.copyfileobj(f_in, f_out)
                elif not sourcefile == resultfile:
                    shutil.move(sourcefile, resultfile)

                # Find or create the target dataset for this entry if it doesn't exist
                cur_dataset_id = default_dsid
                if "sensor" in one_file:
                    sensor_type = one_file["sensor"]
                    if sensor_type in self.sensor_dsid_map:
                        cur_dataset_id = self.sensor_dsid_map[sensor_type]
                    else:
                        new_sensor = Sensors(base=self.sensors.base,
                                             station=self.sensors.station,
                                             sensor=sensor_type)

                        sensor_leaf_name = new_sensor.get_display_name(
                        ) + ' - ' + timestamp
                        ds_exists = get_datasetid_by_name(
                            host, secret_key, sensor_leaf_name)
                        new_dsid = build_dataset_hierarchy_crawl(
                            host,
                            secret_key,
                            self.clowder_user,
                            self.clowder_pass,
                            self.clowderspace,
                            season_name,
                            experiment_name,
                            new_sensor.get_display_name(),
                            timestamp[:4],
                            timestamp[5:7],
                            timestamp[8:10],
                            leaf_ds_name=sensor_leaf_name)

                        if (self.overwrite_ok
                                or not ds_exists) and self.experiment_metadata:
                            self.update_dataset_extractor_metadata(
                                connector, host, secret_key, new_dsid,
                                prepare_pipeline_metadata(
                                    self.experiment_metadata),
                                self.extractor_info['name'])

                        self.sensor_dsid_map[sensor_type] = new_dsid
                        cur_dataset_id = new_dsid

                # Check if file already exists in the dataset
                file_in_dataset = check_file_in_dataset(connector,
                                                        host,
                                                        secret_key,
                                                        cur_dataset_id,
                                                        resultfile,
                                                        remove=False)

                # If the files is already in the dataset, determine if we need to delete it first
                if self.overwrite_ok and file_in_dataset:
                    # Delete the file from the dataset before uploading the new copy
                    self.log_info(
                        resource,
                        "Removing existing file in dataset " + resultfile)
                    check_file_in_dataset(connector,
                                          host,
                                          secret_key,
                                          cur_dataset_id,
                                          resultfile,
                                          remove=True)
                elif not self.overwrite_ok and file_in_dataset:
                    # We won't overwrite an existing file
                    self.log_skip(
                        resource, "Not overwriting existing file in dataset " +
                        resultfile)
                    continue

                # Upload the file to the dataset
                fid = upload_to_dataset(connector, host, self.clowder_user,
                                        self.clowder_pass, cur_dataset_id,
                                        resultfile)

                # Generate our metadata
                meta = build_metadata(host, self.extractor_info, fid, content,
                                      'file')

                # Upload the meadata to the dataset
                upload_metadata(connector, host, secret_key, fid, meta)

                self.created += 1
                self.bytes += os.path.getsize(resultfile)
            else:
                raise Exception("%s was not found" % sourcefile)

Example #6

Show file

    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Load metadata from dataset
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd)
                if 'spatial_metadata' in terra_md_full:
                    spatial_meta = terra_md_full['spatial_metadata']
                else:
                    spatial_meta = None
        if not spatial_meta:
            ValueError("No spatial metadata found.")

        # Determine which files in dataset need clipping
        files_to_process = {}
        for f in resource['local_paths']:
            if f.startswith("ir_geotiff") and f.endswith(".tif"):
                sensor_name = "ir_geotiff"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta['flirIrCamera']['bounding_box']
                }

            elif f.startswith("rgb_geotiff") and f.endswith(".tif"):
                sensor_name = "rgb_geotiff"
                filename = os.path.basename(f)
                if f.endswith("_left.tif"): side = "left"
                else: side = "right"
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta[side]['bounding_box']
                }

            elif f.endswith(".las"):
                sensor_name = "laser3d_las"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": get_las_extents(f)
                }

            # TODO: Add case for laser3d heightmap

        # Fetch experiment name from terra metadata
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        season_name, experiment_name, updated_experiment = get_season_and_experiment(
            timestamp, 'plotclipper', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine script name
        target_scan = "unknown_scan"
        if 'gantry_variable_metadata' in terra_md_full:
            if 'script_name' in terra_md_full['gantry_variable_metadata']:
                target_scan = terra_md_full['gantry_variable_metadata'][
                    'script_name']
                if 'script_hash' in terra_md_full['gantry_variable_metadata']:
                    target_scan += ' ' + terra_md_full[
                        'gantry_variable_metadata']['script_hash']

        all_plots = get_site_boundaries(timestamp.split("__")[0],
                                        city='Maricopa')
        uploaded_file_ids = []

        for filename in files_to_process:
            file_path = files_to_process[filename]["path"]
            file_bounds = files_to_process[filename]["bounds"]

            overlap_plots = find_plots_intersect_boundingbox(file_bounds,
                                                             all_plots,
                                                             fullmac=True)

            if len(overlap_plots) > 0:
                self.log_info(
                    resource, "Attempting to clip %s into %s plot shards" %
                    (filename, len(overlap_plots)))
                for plotname in overlap_plots:
                    plot_bounds = overlap_plots[plotname]
                    tuples = geojson_to_tuples_betydb(
                        yaml.safe_load(plot_bounds))

                    plot_display_name = self.sensors.get_display_name(
                        sensor=sensor_name) + " (By Plot)"
                    leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split(
                        "__")[0]
                    self.log_info(
                        resource,
                        "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
                        (season_name, experiment_name, plot_display_name,
                         timestamp[:4], timestamp[5:7], timestamp[8:10],
                         leaf_dataset))
                    target_dsid = build_dataset_hierarchy_crawl(
                        host,
                        secret_key,
                        self.clowder_user,
                        self.clowder_pass,
                        self.clowderspace,
                        season_name,
                        experiment_name,
                        plot_display_name,
                        timestamp[:4],
                        timestamp[5:7],
                        timestamp[8:10],
                        leaf_ds_name=leaf_dataset)

                    out_file = self.sensors.create_sensor_path(
                        timestamp,
                        plot=plotname,
                        subsensor=sensor_name,
                        filename=filename)
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    if filename.endswith(".tif") and (not file_exists(out_file)
                                                      or self.overwrite):
                        """If file is a geoTIFF, simply clip it and upload it to Clowder"""
                        clip_raster(file_path,
                                    tuples,
                                    out_path=out_file,
                                    compress=True)

                        found_in_dest = check_file_in_dataset(
                            connector,
                            host,
                            secret_key,
                            target_dsid,
                            merged_out,
                            remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host,
                                                       secret_key, target_dsid,
                                                       merged_out)
                            uploaded_file_ids.append(
                                host + ("" if host.endswith("/") else "/") +
                                "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                    elif filename.endswith(".las"):
                        """If file is LAS, we can merge with any existing scan+plot output safely"""
                        merged_out = os.path.join(os.path.dirname(out_file),
                                                  target_scan + "_merged.las")
                        merged_txt = merged_out.replace(
                            ".las", "_contents.txt")

                        already_merged = False
                        if os.path.exists(merged_txt):
                            # Check if contents
                            with open(merged_txt, 'r') as contents:
                                for entry in contents.readlines():
                                    if entry.strip() == file_path:
                                        already_merged = True
                                        break
                        if not already_merged:
                            clip_las(file_path,
                                     tuples,
                                     out_path=out_file,
                                     merged_path=merged_out)
                            with open(merged_txt, 'a') as contents:
                                contents.write(file_path + "\n")

                        # Upload the individual plot shards for optimizing las2height later
                        found_in_dest = check_file_in_dataset(
                            connector,
                            host,
                            secret_key,
                            target_dsid,
                            out_file,
                            remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host,
                                                       secret_key, target_dsid,
                                                       out_file)
                            uploaded_file_ids.append(
                                host + ("" if host.endswith("/") else "/") +
                                "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(out_file)

                        # Upload the merged result if necessary
                        found_in_dest = check_file_in_dataset(
                            connector,
                            host,
                            secret_key,
                            target_dsid,
                            merged_out,
                            remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host,
                                                       secret_key, target_dsid,
                                                       merged_out)
                            uploaded_file_ids.append(
                                host + ("" if host.endswith("/") else "/") +
                                "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                        # Trigger las2height extractor
                        submit_extraction(connector, host, secret_key,
                                          target_dsid,
                                          "terra.3dscanner.las2height")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        extractor_md = build_metadata(host, self.extractor_info,
                                      resource['id'],
                                      {"files_created": uploaded_file_ids},
                                      'dataset')
        self.log_info(resource,
                      "uploading extractor metadata to Level_1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'],
                        self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'],
                        extractor_md)

        self.end_message(resource)

Example #7

Show file

    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # clean tmp directory from any potential failed previous runs
        flist = os.listdir("/tmp")
        for f in flist:
            try:
                os.remove(os.path.join("/tmp", f))
            except:
                pass
        """ if file is above configured limit, skip it
		max_gb = 24 # RAM has 4x requirement, e.g. 24GB requires 96GB RAM
		for fname in resource['local_paths']:
			if fname.endswith('raw'): rawfile = fname
		rawsize = os.stat(rawfile).st_size
		if rawsize > max_gb * 1000000000:
			self.log_skip(resource, "filesize %sGB exceeds available RAM" % int(rawsize/1000000000))
			return False
		"""

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        if resource['dataset_info']['name'].find("SWIR") > -1:
            sensor_rawname = 'SWIR'
            sensor_fullname = 'swir_netcdf'
            soil_mask = None
        else:
            sensor_rawname = 'VNIR'
            sensor_fullname = 'vnir_netcdf'
            # Check for corresponding soil mask to include in workflow.sh if available
            soil_mask = self.sensors.get_sensor_path(timestamp,
                                                     sensor='vnir_soil_masks',
                                                     opts=['soil_mask'])
        out_nc = self.sensors.create_sensor_path(timestamp,
                                                 sensor=sensor_fullname)
        xps_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname,
                                                opts=['xps'])
        ind_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname,
                                                opts=['ind'])
        csv_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname.replace(
                                                    "_netcdf", "_traits"))

        raw_file, terra_md_full = None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, sensor_rawname)
            elif fname.endswith('raw'):
                raw_file = fname
        if None in [raw_file, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(
            timestamp, sensor_rawname, terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        print_name = self.sensors.get_display_name(sensor=sensor_fullname)
        self.log_info(
            resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
            (season_name, experiment_name, print_name, timestamp[:4],
             timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            season_name,
            experiment_name,
            print_name,
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        uploaded_file_ids = []

        # Perform actual processing
        if (not file_exists(out_nc)) or self.overwrite:
            """TODO: OLD AND NOT USED
			self.log_info(resource, 'invoking hyperspectral_workflow.sh to create: %s' % out_nc)
			if soil_mask and file_exists(soil_mask):
				# If soil mask exists, we can generate an _ind indices file
				returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h",
										  "-m", soil_mask, "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth
			else:
				# Otherwise we cannot, and need to trigger soilmask extractor and circle back later
				returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h",
											  "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth
			if returncode != 0:
				raise ValueError('script encountered an error')
			"""

            self.log_info(resource,
                          'invoking python calibration to create: %s' % out_nc)
            create_empty_netCDF(raw_file, out_nc)
            self.log_info(resource, 'applying calibration to: %s' % out_nc)
            apply_calibration(raw_file, out_nc)
            self.log_info(resource, '...done' % raw_file)

            found_in_dest = check_file_in_dataset(connector,
                                                  host,
                                                  secret_key,
                                                  target_dsid,
                                                  out_nc,
                                                  remove=self.overwrite)
            if not found_in_dest or self.overwrite:
                fileid = upload_to_dataset(connector, host, secret_key,
                                           target_dsid, out_nc)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            self.created += 1
            self.bytes += os.path.getsize(out_nc)

            # TODO: Still compatible?
            #if not soil_mask:
            #	self.log_info(resource, "triggering soil mask extractor on %s" % fileid)
            #	submit_extraction(connector, host, secret_key, fileid, "terra.sunshade.soil_removal")

            # TODO: Sent output to BETYdb
            """
			# Send indices to betyDB
			if file_exists(ind_file):
				# TODO: Use ncks to trim ind_file to plots before this step
				plot_no = 'Full Field'
	
				with Dataset(ind_file, "r") as netCDF_handle:
					ndvi = netCDF_handle.get_variables_by_attributes(standard_name='normalized_difference_chlorophyll_index_750_705')
					NDVI705 = ndvi[0].getValue().ravel()[0]
	
					# TODO: Map the remaining ~50 variables in BETY to create indices file
					# TODO: In netCDF header,
	
					csv_header = 'local_datetime,NDVI705,access_level,species,site,' \
								 'citation_author,citation_year,citation_title,method'
					csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \
							   'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % (
									timestamp, NDVI705, plot_no)
					with open(csv_file, 'w') as c:
						c.write(csv_header+'\n'+csv_vals)
	
				# TODO: Send this CSV to betydb & geostreams extractors instead
				submit_traits(csv_file, bety_key=self.bety_key)
			"""

        self.end_message(resource)

Example #8

Show file

File: upload_directories_to_clowder.py Project: terraref/computing-pipeline

def upload_dataset(dataset_path, level, product, timestamp, sess, logfile):
    contents = os.listdir(dataset_path)
    if len(contents) == 0:
        logfile.write('%s,%s,"%s",%s\n' %
                      (level, product, dataset_path, "ERR: No files found"))
        return False

    # Find and prepare the metadata
    clean_md = None
    if product == "scanner3DTop" and level == "Level_1":
        # Special check between Level_1/raw_data for scanner3DTop only
        path3d = dataset_path.replace("Level_1", "raw_data")
        contents3d = os.listdir(path3d)
        for f in contents3d:
            if f.endswith("_metadata.json"):
                md = load_json_file(os.path.join(path3d, f))
                clean_md = clean_metadata(md, product)
                if dry_run:
                    print("...%s successfully cleaned." %
                          os.path.join(path3d, f))
    else:
        for f in contents:
            if f.endswith("_metadata.json"):
                md = load_json_file(os.path.join(dataset_path, f))
                clean_md = clean_metadata(md, product)
                if dry_run:
                    print("...%s successfully cleaned." %
                          os.path.join(dataset_path, f))
            elif f.endswith("_metadata_cleaned.json"):
                clean_md = load_json_file(os.path.join(dataset_path, f))
                if dry_run:
                    print("...%s successfully loaded." %
                          os.path.join(dataset_path, f))

    if clean_md is None and product is not "EnvironmentLogger":
        logfile.write('%s,%s,"%s",%s\n' %
                      (level, product, dataset_path, "ERR: No metadata found"))
        return False

    # Create the dataset in Clowder
    season_name, experiment_name, updated_experiment = get_season_and_experiment(
        timestamp, product, clean_md)
    YYYY = timestamp[:4]
    MM = timestamp[5:7]
    DD = timestamp[8:10]
    dataset_name = "%s - %s" % (product, timestamp)
    if not dry_run:
        dsid = build_dataset_hierarchy_crawl(clowder_host, clowder_admin_key,
                                             clowder_user, clowder_pass,
                                             clowder_space, season_name,
                                             experiment_name, product, YYYY,
                                             MM, DD, dataset_name)
    else:
        dsid = "JustPretend"
    logfile.write('%s,%s,"%s",%s\n' %
                  (level, product, dataset_path, "OK: %s" % dsid))

    # Upload metadata
    if not dry_run and product is not "EnvironmentLogger":
        sess.post(
            "%sapi/datasets/%s/metadata.jsonld" % (clowder_host, dsid),
            headers={'Content-Type': 'application/json'},
            data=json.dumps({
                "@context": [
                    "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
                    {
                        "@vocab":
                        "https://terraref.ncsa.illinois.edu/metadata/uamac#"
                    }
                ],
                "content":
                clean_md,
                "agent": {
                    "@type":
                    "cat:user",
                    "user_id":
                    "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" %
                    clowder_userid
                }
            }))

    # Add each file
    for f in contents:
        if not (f.endswith("_metadata.json")
                or f.endswith("_metadata_cleaned.json")):
            filepath = os.path.join(dataset_path, f)
            if not dry_run:
                upload_to_dataset(conn, clowder_host, clowder_user,
                                  clowder_pass, dsid, filepath)
            else:
                print("...would upload %s" % f)

    return True