def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.bin', '_right.bin']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if not self.overwrite and get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if file_exists(left_tiff) and file_exists(right_tiff):
                    self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_error(resource, "no terraref metadata found; sending to cleaner")
            submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner")
            return CheckMessage.ignore
Example #2
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        if len(resource['files']) >= 23:
            md = download_metadata(connector, host, secret_key, resource['id'])
            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                timestamp = resource['name'].split(" - ")[1]
                out_fullday_netcdf = self.sensors.create_sensor_path(timestamp)
                out_fullday_csv = out_fullday_netcdf.replace(".nc", "_geo.csv")
                if file_exists(out_fullday_netcdf) and file_exists(
                        out_fullday_csv):
                    self.log_skip(
                        resource, "metadata v%s and outputs already exist" %
                        self.extractor_info['version'])
                    return CheckMessage.ignore
            return CheckMessage.download
        else:
            self.log_skip(resource, "found less than 23 files")
            return CheckMessage.ignore
Example #3
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for an _ir.bin file before beginning processing
        if not contains_required_files(resource, ['_ir.bin']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                tif = self.sensors.get_sensor_path(timestamp)
                png = tif.replace(".tif", ".png")
                if file_exists(png) and file_exists(tif):
                    self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
                    return CheckMessage.ignore
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            return CheckMessage.ignore
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.tif', '_right.tif']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if not self.force:
                # Check NRMAC score > 15 before proceeding if available
                nrmac_md = get_extractor_metadata(md, "terra.stereo-rgb.nrmac")
                if not (nrmac_md and 'left_quality_score' in nrmac_md):
                    self.log_skip(resource,
                                  "NRMAC quality score not available")
                    return CheckMessage.ignore
                elif float(nrmac_md['left_quality_score']) > self.threshold:
                    self.log_skip(
                        resource,
                        "NRMAC quality score %s is above threshold of %s" %
                        (float(
                            nrmac_md['left_quality_score']), self.threshold))
                    return CheckMessage.ignore

            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                                opts=['left'])
                right_enh_tiff = self.sensors.create_sensor_path(
                    timestamp, opts=['right'])
                if file_exists(left_enh_tiff) and file_exists(right_enh_tiff):
                    if contains_required_files(resource, [
                            os.path.basename(left_enh_tiff),
                            os.path.basename(right_enh_tiff)
                    ]):
                        self.log_skip(
                            resource,
                            "metadata v%s and outputs already exist" %
                            self.extractor_info['version'])
                        return CheckMessage.ignore
                    else:
                        self.log_info(
                            resource,
                            "output files exist but not yet uploaded")
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_error(resource, "no terraref metadata found")
            return CheckMessage.ignore
Example #5
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        if not contains_required_files(
                resource,
            ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        if resource['dataset_info']['name'].find("SWIR") > -1:
            sensor_fullname = 'swir_netcdf'
        else:
            sensor_fullname = 'vnir_netcdf'

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                # Make sure outputs properly exist
                out_nc = self.sensors.get_sensor_path(timestamp,
                                                      sensor=sensor_fullname)
                if file_exists(out_nc):
                    self.log_skip(
                        resource, "metadata v%s and outputs already exist" %
                        self.extractor_info['version'])
                    return CheckMessage.ignore
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            # See if we can recover it from disk
            if sensor_fullname == 'vnir_netcdf':
                date = timestamp.split("__")[0]
                source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % (
                    date, timestamp)
                for f in os.listdir(source_dir):
                    if f.endswith("_metadata.json"):
                        self.log_info(resource,
                                      "updating metadata from %s" % f)
                        raw_dsmd = load_json_file(os.path.join(source_dir, f))
                        clean_md = clean_metadata(raw_dsmd, 'VNIR')
                        complete_md = build_metadata(host, self.extractor_info,
                                                     resource['id'], clean_md,
                                                     'dataset')
                        remove_metadata(connector, host, secret_key,
                                        resource['id'])
                        upload_metadata(connector, host, secret_key,
                                        resource['id'], complete_md)
                        return CheckMessage.download
            return CheckMessage.ignore
Example #6
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download
        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        found_left = False
        found_right = False
        for f in resource['files']:
            if 'filename' in f:
                if f['filename'].endswith('_left.bin'):
                    found_left = True
                elif f['filename'].endswith('_right.bin'):
                    found_right = True
        if not (found_left and found_right):
            self.log_skip(
                resource,
                "found left: %s, right: %s" % (found_left, found_right))
            return CheckMessage.ignore

        # Check if outputs already exist unless overwrite is forced - skip if found
        if not self.overwrite:
            timestamp = resource['dataset_info']['name'].split(" - ")[1]
            lbase = self.sensors.get_sensor_path(timestamp,
                                                 opts=['left'],
                                                 ext='')
            rbase = self.sensors.get_sensor_path(timestamp,
                                                 opts=['right'],
                                                 ext='')
            out_dir = os.path.dirname(lbase)
            if (os.path.isfile(lbase + 'tif')
                    and os.path.isfile(rbase + 'tif')):
                self.log_skip(resource, "outputs found in %s" % out_dir)
                return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_extractor_metadata(
                md, self.extractor_info['name']) and not self.overwrite:
            self.log_skip("metadata indicates it was already processed")
            return CheckMessage.ignore
        if get_terraref_metadata(md):
            return CheckMessage.download
        else:
            self.log_skip("no terraref metadata found")
            return CheckMessage.ignore
	def check_message(self, connector, host, secret_key, resource, parameters):
		if not is_latest_file(resource):
			return CheckMessage.ignore

		# Check for expected input files before beginning processing
		if len(get_all_files(resource)) >= 23:
			md = download_metadata(connector, host, secret_key, resource['id'])
			if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
				self.log_skip(resource, "metadata v%s already exists" % self.extractor_info['version'])
				return CheckMessage.ignore
			return CheckMessage.download
		else:
			self.log_skip(resource, 'not all input files are ready')
			return CheckMessage.ignore
    def check_message(self, connector, host, secret_key, resource, parameters):
        # Check for 0000-0101 bin files before beginning processing
        if len(resource['files']) < 102:
            return CheckMessage.ignore
        if not is_latest_file(resource):
            return CheckMessage.ignore

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        hist_path = self.sensors.get_sensor_path(timestamp,
                                                 opts=['combined_hist'])
        coloredImg_path = self.sensors.get_sensor_path(
            timestamp, opts=['combined_pseudocolored'])

        # Count number of bin files in dataset, as well as number of existing outputs
        ind_add = 0
        ind_output = 0
        for ind in range(0, 102):
            format_ind = "{0:0>4}".format(ind)  # e.g. 1 becomes 0001
            for f in resource['files']:
                if f['filename'].endswith(format_ind + '.bin'):
                    ind_add += 1
                    out_png = self.sensors.get_sensor_path(timestamp,
                                                           opts=[format_ind])
                    if os.path.exists(out_png) and not self.overwrite:
                        ind_output += 1
                    break

        # Do the outputs already exist?
        if ind_output == 102 and os.path.exists(hist_path) and os.path.exists(
                coloredImg_path):
            logging.info("skipping dataset %s, outputs already exist" %
                         resource['id'])
            return CheckMessage.ignore
        # Do we have too few input BIN files?
        if ind_add < 102:
            return CheckMessage.ignore

        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_extractor_metadata(
                md, self.extractor_info['name']) and not self.overwrite:
            logging.info("skipping dataset %s, found existing metadata" %
                         resource['id'])
            return CheckMessage.ignore

        if get_terraref_metadata(md):
            return CheckMessage.download

        return CheckMessage.ignore
Example #9
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            return CheckMessage.ignore

        # Check for an ir.BIN file and metadata before beginning processing
        found_ir = None
        found_md = None
        for f in resource['files']:
            if 'filename' in f and f['filename'].endswith('_ir.bin'):
                found_ir = f['filepath']
            elif 'filename' in f and f['filename'].endswith('_metadata.json'):
                found_md = f['filepath']

        if found_ir:
            # Check if outputs already exist
            timestamp = resource['dataset_info']['name'].split(" - ")[1]
            png_path = self.sensors.get_sensor_path(timestamp, ext='png')
            tiff_path = self.sensors.get_sensor_path(timestamp)

            if os.path.exists(png_path) and os.path.exists(
                    tiff_path) and not self.overwrite:
                logging.getLogger(__name__).info(
                    "skipping dataset %s, outputs already exist" %
                    resource['id'])
                return CheckMessage.ignore

            # If we don't find _metadata.json file, check if we have metadata attached to dataset instead
            if not found_md:
                md = download_metadata(connector, host, secret_key,
                                       resource['id'])
                if get_extractor_metadata(
                        md,
                        self.extractor_info['name']) and not self.overwrite:
                    logging.getLogger(__name__).info(
                        "skipping dataset %s, already processed" %
                        resource['id'])
                    return CheckMessage.ignore
                if get_terraref_metadata(md):
                    return CheckMessage.download
                return CheckMessage.ignore
            else:
                return CheckMessage.download
        return CheckMessage.ignore
Example #10
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.tif', '_right.tif']):
            self.log_skip(resource, "missing required files")
            # Check for raw_data_source in metadata and resumbit to bin2tif if available...
            md = download_metadata(connector, host, secret_key, resource['id'])
            terra_md = get_terraref_metadata(md)
            if 'raw_data_source' in terra_md:
                raw_id = str(terra_md['raw_data_source'].split("/")[-1])
                self.log_info(resource, "submitting raw source %s to bin2tif" % raw_id)
                submit_extraction(connector, host, secret_key, raw_id, "terra.stereo-rgb.bin2tif")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if (self.leftonly and file_exists(left_nrmac_tiff)) or (
                                not self.leftonly and file_exists(left_nrmac_tiff) and file_exists(right_nrmac_tiff)):
                    if contains_required_files(resource, [os.path.basename(left_nrmac_tiff)]):
                        self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
                        return CheckMessage.ignore
                    else:
                        self.log_info(resource, "output file exists but not yet uploaded")
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            return CheckMessage.ignore
Example #11
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        # Check for 0000-0101 bin files before beginning processing
        if len(resource['files']) < 102:
            return CheckMessage.ignore
        if not is_latest_file(resource):
            return CheckMessage.ignore

        # Count number of bin files in dataset, as well as number of existing outputs
        ind_add = 0
        for ind in range(0, 102):
            format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001
            for f in resource['files']:
                if f['filename'].endswith(format_ind+'.bin'):
                    ind_add += 1
                    break
		    
        # Do we have too few input BIN files?
        if ind_add < 102:
            return CheckMessage.ignore
        else:
            return CheckMessage.download
Example #12
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            # Check for a left and right TIF file - skip if not found
            # If we're only processing the left files, don't check for the right file
            needed_files = ['_left.tif']
            if not self.leftonly:
                needed_files.append('_right.tif')
            if not contains_required_files(resource, needed_files):
                self.log_skip(resource, "missing required files")
                return CheckMessage.ignore

            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if (self.leftonly and file_exists(left_mask_tiff)) or \
                   (not (file_exists(left_mask_tiff) and file_exists(right_mask_tiff))):
                    self.log_skip(resource, "metadata v%s and outputs already exist" % \
                                  self.extractor_info['version'])
                    return CheckMessage.ignore
        # Check for other images to create a mask on
        elif not contains_required_files(resource, ['.tif']):
            self.log_skip(resource, "missing required tiff file")
            return CheckMessage.ignore

        # Have TERRA-REF metadata, but not any from this extractor
        return CheckMessage.download