def get_last_response(self, ip, full=False): """ Get QI metadata response from previous job. :param str ip: image product :param bool full: True for full data otherwise only relevant part :return dict: QI metadata """ try: job_id = Logger.db_handler().last_job_id( self.config['processors'][0]) except KeyError: raise ProcessorCriticalError(self, "No processors defined in config") if not job_id: Logger.debug( "First run? Unable to get last response from JSON file") return None json_file = os.path.join(self.config['logging']['directory'], '{0:05d}'.format(job_id), ip + '.json') if not os.path.exists(json_file): raise ProcessorCriticalError( self, "Response file {} not found".format(json_file)) data = JsonIO.read(json_file, response=True) if full: return data relevant_part = QCResponse(data).get(self.isMeasurementOf) if not relevant_part: if self.config['strict']['enabled']: raise ProcessorCriticalError( self, "Unable to get relevant part for {} ({})".format( self.isMeasurementOf, ip)) else: return {} if hasattr(self, "isMeasurementOfSection"): relevant_part_tmp = {} for key in relevant_part.keys(): if key in ("isMeasurementOf", "value", "lineage", self.isMeasurementOfSection): relevant_part_tmp[key] = relevant_part[key] relevant_part = relevant_part_tmp return relevant_part
def _get_qi_results_path(self, ip): """Get IP specific QI results path. :param str ip: image product :return str: output path """ # no output path defined, assuming QI results output_path = os.path.join( self.config['project']['path'], self.config['project']['downpath'], ip + self.data_dir_suf, ) if not os.path.exists(output_path): # no output directory defined Logger.debug("Output path {} does not exist".format(output_path)) return None dirs = os.listdir(output_path) if 'GRANULE' in dirs: # only one directory is expected here (sentinel-2) dirs = os.listdir(os.path.join(output_path, 'GRANULE')) if len(dirs) != 1: raise ProcessorCriticalError( "Unexpected number of data sub-directories") return os.path.join(output_path, 'GRANULE', dirs[0], 'QI_DATA', 'QCMMS') return os.path.join(output_path, 'QI_DATA', 'QCMMS')
def update_response(self, response_data): """Update current IP QI metadata response. :param dict response_data: key value pairs to update """ if response_data.get('lineage') is None: # set default lineage if not defined response_data["lineage"] = self.get_lineage() Logger.debug("Updating response {} on idx {}".format( self.isMeasurementOf, self._current_response_idx)) try: response = self._response[self._current_response_idx] except (IndexError, KeyError): raise ProcessorCriticalError( self, "Inconsistence response counter: index ({}) length ({})". format(self._current_response_idx, len(self._response))) response.update(response_data, self.isMeasurementOf) # check metadata-response consistency ## 1. value == False -> rejected if response.get_value(self.isMeasurementOf) is False and \ self.get_response_status() not in (DbIpOperationStatus.rejected, DbIpOperationStatus.failed): Logger.warning( "Value response status incosistency: setting status from {} to rejected" .format(self.get_response_status())) # value is False -> switch to rejected status self.set_response_status(DbIpOperationStatus.rejected) ## 2. rejected | failed -> value = False if response.status in (DbIpOperationStatus.rejected, DbIpOperationStatus.failed) and \ response.get_value(self.isMeasurementOf) is not False: Logger.warning( "Value response status inconsistency: setting value to False") response.set_value(self.isMeasurementOf, False)
def compute_value_count(filename): """Compute value count for input raster file. Raise ProcessorCriticalError on failure. :param str filename: raster filename :return tuple: value, count, ncells """ from osgeo import gdal, gdalconst import numpy as np try: ds = gdal.Open(filename, gdalconst.GA_ReadOnly) band = ds.GetRasterBand(1) array = np.array(band.ReadAsArray()) value, count = np.unique(array, return_counts=True) ncells = band.XSize * band.YSize if sum(count) != ncells: raise ProcessorFailedError( self, "File {}: cell number mismatch".format(filename)) ds = None except RuntimeError as e: raise ProcessorCriticalError( self, "Computing value/count statistics failed: {}".format(e)) return value, count, ncells
def delete_qi_results(self, directory): """Try to delete QI results produced by processor. On strict mode raise error when object does not exist. :param str directory: directory where to search for QI results """ if self.config['strict']['enabled'] and \ not check_qi_results(directory): raise ProcessorCriticalError(self, "QI results mismatch detected") for filepath in self.get_qi_results(directory): os.remove(filepath)
def get_response_status(self): """Get status of current IP response. :return DbIpOperationStatus status: IP status """ try: return self._response[self._current_response_idx].status except (IndexError, KeyError): raise ProcessorCriticalError( self, "Configuration inconsistency - current response index {} " "vs. number of responses {}".format(self._current_response_idx, len(self._response)))
def set_response_status(self, status): """Set status for current IP response. :param DbIpOperationStatus status: status to be set """ try: self._response[self._current_response_idx].status = status except (IndexError, KeyError): raise ProcessorCriticalError( self, "Configuration inconsistency - current response index {} " "vs. number of responses {}".format(self._current_response_idx, len(self._response)))
def previous(self): """Get previous processor name defined in the queue. :return str: processor identifier """ try: idx = self.config['processors'].index(self.identifier) except ValueError: idx = -1 if idx < 1: raise ProcessorCriticalError(self, "no previous processor defined") return self.config['processors'][idx - 1]
def set_platform_type(self, ptype): """Set current platform type. :param str: QCPlatformType string """ if ptype == 'primary': pptype = QCPlatformType.Primary elif ptype == 'supplementary': pptype = QCPlatformType.Supplementary else: raise ProcessorCriticalError( self, "Unsupported platform type: {}".format(ptype)) self.platform_type = pptype
def delete_path(self, path): """Try to delete a path object (file or directory). On strict mode raise error when object does not exist. :param str path: object to delete """ try: if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path) except FileNotFoundError as e: # raise critical error only in strict mode if self.config['strict']['enabled']: raise ProcessorCriticalError(self, '{}'.format(e))
def _run(self, meta_data, data_dir, output_dir): """Perform processor tasks. :param meta_data: IP metadata :param str data_dir: Path to data directory :param str output_dir: path to output processor directory :return dict: QI metadata """ import arosics response_data = { self.isMeasurementOfSection: [{ "id": "http://qcmms.esa.int/detailed_control#GEOMETRY", "generatedAtTime": datetime.now(), "requirement": False }] } # reference image must be defined try: im_reference = self.config.abs_path( self.config['geometry']['reference_image']) except KeyError: Logger.error("Reference image not defined") self.set_response_status(DbIpOperationStatus.failed) return if not os.path.exists(im_reference): Logger.error("Reference image '{}' not found".format(im_reference)) self.set_response_status(DbIpOperationStatus.failed) return lh_output_dir = self.get_lh_dir(data_dir) bands2stack = self.config['geometry'].get('band4match', 3) resolution = self.config['land_product'].get('geometric_resolution') correct_shifts = self.config['geometry'].get('correct_shifts', False) has_stack = 'harmonization_stack' in self.config['processors'] try: stack_on = self.config['geometry']['stack_on'] if stack_on is True and has_stack is False: Logger.warning("Harmonization stack not available") stack_on = False except KeyError: stack_on = has_stack try: if stack_on: im_target = self._get_target_stack(lh_output_dir) else: filepattern_suff = '_{}m{}'.format( resolution, self.img_extension) \ if 'L2A' in data_dir else '{}'.format(self.img_extension) filepattern = '{}{}'.format( self.bands2stack(bands2stack, data_dir), filepattern_suff) try: im_target = self.filter_files(data_dir, filepattern)[0] except IndexError: raise ProcessorCriticalError( self, "Pattern '{}' not found in {}".format( filepattern, data_dir)) Logger.debug("Target image: {}".format(im_target)) except ProcessorFailedError: self.set_response_status(DbIpOperationStatus.failed) return # check EPSG codes if self.getEpsgCode(im_target) != self.getEpsgCode(im_reference): Logger.error("Inconsistent reference and image EPSG codes") self.set_response_status(DbIpOperationStatus.rejected) return try: mask = tempfile.mktemp(prefix='mask_') self.create_mask(data_dir, mask, resolution) except ProcessorFailedError: self.set_response_status(DbIpOperationStatus.failed) return # arocics settings coreg_file_prefix = '' if correct_shifts is True: coreg_file_prefix += 'coreg_' if stack_on: coreg_file_prefix += 'stack_' aro_raster_path = os.path.join( lh_output_dir, '{}{}.tif'.format(coreg_file_prefix, os.path.split(lh_output_dir)[-1])) kwargs = { 'fmt_out': 'GTIFF', 'grid_res': 30, 'max_points': 1000, 'mask_baddata_tgt': mask, 'path_out': aro_raster_path, 'projectDir': lh_output_dir, 'nodata': self.get_nodata_values(), 'window_size': (128, 128) } if stack_on: kwargs['s_b4match'] = bands2stack # run arocics Logger.debug( "Running coreg for reference: {}; image: {} with args {}".format( im_reference, im_target, kwargs)) try: CRL = arosics.COREG_LOCAL(im_reference, im_target, **kwargs) except AssertionError as e: Logger.error("Coreg failed: {}".format(e)) self.set_response_status(DbIpOperationStatus.failed) return response_data try: CRL_points_table = CRL.tiepoint_grid.get_CoRegPoints_table() except ValueError: CRL_points_table = None try: self.to_PointFormat( CRL_points_table, self._result['qi.files']['gml_before_correction'], CRL.outFillVal, epsg=self.getEpsgCode(im_target)) except ProcessorRejectedError: return response_data if correct_shifts is True: CRL.correct_shifts() CRL_after_corr = arosics.COREG_LOCAL(im_reference, CRL.path_out, **kwargs) try: CRL_points_table_after_corr = \ CRL_after_corr.tiepoint_grid.get_CoRegPoints_table() except ValueError: CRL_points_table_after_corr = None try: self.to_PointFormat( CRL_points_table_after_corr, self._result['qi.files']['gml_after_correction'], CRL_after_corr.outFillVal, epsg=self.getEpsgCode(im_target)) except ProcessorRejectedError: Logger.debug( 'No GCP points found in the GML after correction. Copying ' 'the one before the correction as the one after.') from shutil import copy2 copy2(self._result['qi.files']['gml_before_correction'], self._result['qi.files']['gml_after_correction']) else: CRL_after_corr = None if self.visualize is True or self.save_visualization is True: self._visualize(CRL, CRL_after_corr, self.visualize, self.save_visualization, correct_shifts, output_dir, resolution, aro_raster_path, stack_on) # update response attributes pixel_metadata_coding_conf = self.config['pixel_metadata_coding'] response_data[self.isMeasurementOfSection][0].update({ 'mask': self.file_basename( self._result['qi.files']['gml_before_correction']), 'rasterCoding': pixel_metadata_coding_conf[self.identifier], 'lineage': 'http://qcmms.esa.int/Arosics_v{}'.format(arosics.__version__) }) try: stats = self.compute_stats(CRL_points_table, resolution, CRL.outFillVal) response_data[self.isMeasurementOfSection][0].update(stats) except ProcessorFailedError: return response_data os.remove(mask) return response_data
def run(self): """Run processor tasks. :return int: response counter value """ # log start computation self._run_start() # loop through image products (IP) processor_previous = self.previous() processed_ips = Logger.db_handler().processed_ips( processor_previous, platform_type=self.platform_type) ip_count = len(processed_ips) if ip_count < 1: Logger.warning( "No IP products to process (previous processor: {})".format( processor_previous)) counter = 1 for ip, status in processed_ips: # increment counter self._current_response_idx += 1 Logger.info("({}) Processing {}... ({}/{})".format( self.identifier, ip, counter, ip_count)) counter += 1 # get last IP status ip_status = self.get_last_ip_status(ip, status) # skip rejected IP (QA not passed) if ip_status == DbIpOperationStatus.rejected: self.ip_operation(ip, ip_status) response_data = self.get_last_response(ip) if response_data: self.update_response(response_data) continue # set current response status from DB self.set_response_status(status) # read metadata meta_data = self.get_meta_data(ip) # define output path # check whether results exists if self.output_path is None: # output path not defined, assuming QI results (level2) try: output_path = self._get_qi_results_path( self.get_processing_level2(meta_data)['title']) except TypeError: Logger.warning( "Level2 product not found, switching back to level1!") output_path = self._get_qi_results_path(meta_data['title']) results_exist = self.check_qi_results(output_path) else: output_path = self.output_path results_exist = os.path.exists(self._get_ip_output_path(ip)) # force absolute path try: output_path = os.path.abspath(output_path) except TypeError: raise ProcessorCriticalError(self, "Output directory not defined!") # determine whether to force the computation # ip_status is None -> no previous processor run detected force = status == DbIpOperationStatus.forced or \ ip_status is None or \ status == DbIpOperationStatus.unchanged and not results_exist # perform processor operations if requested if status in (DbIpOperationStatus.added, DbIpOperationStatus.updated, DbIpOperationStatus.failed) or force: if force: # change status from unchanged to updated if not results_exist: Logger.debug("Missing results") Logger.debug("Operation forced") # create processor result directory if not exists if output_path and not os.path.exists(output_path): os.makedirs(output_path) # run processor computation if requested down_path = self.get_data_dir() if self.level2_data: try: ip_dd = self.get_processing_level2(meta_data)['title'] except TypeError: # switch back to L1 ip_dd = ip else: ip_dd = ip data_dir = os.path.join( down_path, '{}{}'.format(ip_dd, self.data_dir_suf)) Logger.debug("Data dir: {}".format(data_dir)) Logger.debug("Output dir: {}".format(output_path)) # run computation response_data = self._run(meta_data, data_dir, output_path) else: # no change, get response data from previous run response_data = self.get_last_response(ip) # update response if response_data: self.update_response(response_data) # log IP operation self.ip_operation( ip, self._response[self._current_response_idx].status) # log computation finished self._run_done() return self._current_response_idx
def _ordinary_control(self, filepath, level=1): """Perform ordinary control. :param str filepath: path to zip file :param int level: level number for response """ from osgeo import gdal, osr if level not in (1, 2): raise ProcessorCriticalError(self, "Unsupported level: {}".format(level)) response_data = {'value': False} # 1. unarchive product dirname = os.path.join( os.path.dirname(filepath), os.path.basename(filepath).rstrip(self.extension) + self.data_dir_suf) try: if not os.path.exists(filepath) and \ not os.path.exists(dirname): raise ProcessorRejectedError( self, "No input data: {}/{} not found".format(filepath, dirname)) if not os.path.exists(dirname): Logger.info("Unarchiving {}...".format(filepath)) dirname = self.unarchive(filepath, dirname) except ProcessorRejectedError: return response_data # 2. check if all bands are available try: img_files, band_res = self.check_bands(dirname, level) except ProcessorRejectedError: return response_data level_key = 'level{}'.format(level) response_data[level_key] = {} response_data[level_key]['rastersComplete'] = True response_data[level_key]['channels'] = len(img_files) response_data[level_key]['lineage'] = self.get_lineage_level(filepath) # 3. check if raster file is readable try: self.check_bands_read(img_files) except ProcessorRejectedError: return response_data Logger.info("All imagery files found and readable") response_data[level_key]['rastersRead'.format(level)] = True # 4. read raster characteristics try: epsg_res, format_res = self.check_bands_properties( img_files, band_res) self.check_epsg_tile_name(epsg_res, filepath) except ProcessorRejectedError: return response_data # update response (epgs, format, bands) response_data[level_key]['epsg'] = int(epsg_res) if format_res == 'JP2OpenJPEG': data_format = 'JPEG' elif format_res == 'GTiff': data_format = 'TIFF' else: data_format = '?' response_data[level_key]['format'] = data_format response_data[level_key]['bands'] = band_res # 5. check metadata metadata_read = self.check_metadata(dirname, level) response_data[level_key]['metadataRead'] = metadata_read[0] if self.has_calibration_metadata: response_data[level_key]['calibrationMetadata'] = metadata_read[1] else: response_data[level_key]['calibrationMetadata'] = True # 6. selected for next control qi_failed = [] for attr in ('rastersComplete', 'rastersRead', 'metadataRead', 'calibrationMetadata'): if not response_data[level_key][attr]: qi_failed.append(attr) response_data['value'] = len(qi_failed) < 1 if qi_failed: Logger.error("Rejected because of {}".format(','.join(qi_failed))) self.set_response_status(DbIpOperationStatus.rejected) return response_data