def file_basename(self, filepath): """Return file basename. :param str filepath: path to the file :return str: file basename """ base_path = os.path.abspath( filepath)[len(os.path.abspath(self.config['project']['path'])) + 1:] tuc_name = self.config['catalog']['ip_parent_identifier'].split( ':')[-1] # determine URL for catalog url = base_path if self.config.has_section('catalog') and \ self.config['catalog'].get('response_url'): url = '{}/{}/{}'.format( self.config['catalog']['response_url'].rstrip('/'), tuc_name, base_path) # copy file to www directory if defined www_dir = self.config['catalog'].get('www_dir') if www_dir: target = os.path.join(www_dir, tuc_name, base_path.lstrip('/')) target_dir = os.path.dirname(target) if not os.path.exists(target_dir): os.makedirs(target_dir) shutil.copyfile( filepath, target, ) Logger.debug("File {} copied to {}".format(filepath, target)) return url
def check_bands(self, dirname, level): """Check raster bands. :param str dirname: image product directory. :param int level: level to be checked (1, 2) :return tuple: image filenames, bands """ img_files_all = self.filter_files(dirname, extension=self.img_extension) band_res = [] img_files = [] bands = self.get_bands(level) for band in bands: pattern = r'.*_{}.*{}'.format(band, self.img_extension) found = False for item in img_files_all: if not re.search(pattern, item): continue found = True Logger.debug("File {} found: {}".format(pattern, item)) # B10 or B10_10m, ... band_res.append({'id': item[item.find(band):].split('.')[0]}) img_files.append(item) if not found: raise ProcessorRejectedError( self, "{} not found in {}".format(pattern, dirname)) return img_files, band_res
def _get_qi_results_path(self, ip): """Get IP specific QI results path. :param str ip: image product :return str: output path """ # no output path defined, assuming QI results output_path = os.path.join( self.config['project']['path'], self.config['project']['downpath'], ip + self.data_dir_suf, ) if not os.path.exists(output_path): # no output directory defined Logger.debug("Output path {} does not exist".format(output_path)) return None dirs = os.listdir(output_path) if 'GRANULE' in dirs: # only one directory is expected here (sentinel-2) dirs = os.listdir(os.path.join(output_path, 'GRANULE')) if len(dirs) != 1: raise ProcessorCriticalError( "Unexpected number of data sub-directories") return os.path.join(output_path, 'GRANULE', dirs[0], 'QI_DATA', 'QCMMS') return os.path.join(output_path, 'QI_DATA', 'QCMMS')
def __init__(self, username, password, archive, backup_archive=None): """Connect API. Raise ProcessorFailedError on failure """ from sentinelsat.sentinel import SentinelAPI, SentinelAPIError # remember settings for query() self.archive = archive self.backup_archive = backup_archive # connect API try: self.api = SentinelAPI(username, password, archive) except (SentinelAPIError, ConnectionError) as e: self.api = None if backup_archive: # re-try with backup archive Logger.error( "Unable to connect {} ({}). Re-trying with {}...".format( archive, e, backup_archive)) try: self.api = SentinelAPI(username, password, backup_archive) except (SentinelAPIError, ConnectionError) as e: self.api = None if self.api is None: raise ProcessorFailedError(self, "Unable to connect: {}".format(e), set_status=False) Logger.debug("Sentinel API connected")
def set_processors(self, processors=[]): """Set list of processors to be performed. :param list processors: list of processors to be registered (if none than configuration will be used) """ if not processors: try: processors = self.config['processors'] except KeyError: raise ConfigError(self._config_files, "list of processors not defined" ) else: # override configuration self.config['processors'] = processors if not processors: return for identifier in processors: found = False for processor in processors_list: if processor.identifier == identifier: Logger.debug("'{}' processor registered".format( processor.identifier )) self._processors.append( processor(self.config, self.response) ) found = True break if not found: self.config.processor_not_found(identifier)
def create_stack(self, data_dir, output_dir, stack_name): """Create stack of all bands. :param data_dir: directory with the Sentinel scene :param output_dir: path to a directory where the stack will be saved :param stack_name: stack filename """ import rasterio paths_resampled = self._resample_bands(data_dir) with rasterio.open(paths_resampled[0]) as band1: meta = band1.meta if meta['driver'] != 'GTiff': meta['driver'] = 'GTiff' stack_length = self.get_stack_length() meta.update(count=stack_length) stack_path = os.path.join(output_dir, stack_name) Logger.debug("Creating stack {} from {} bands...".format( stack_path, len(paths_resampled))) with rasterio.open(stack_path, 'w', **meta) as stack: stack.write(band1.read(1), 1) for band_id, band in enumerate(paths_resampled[1:], start=2): with rasterio.open(band) as b: x = b.read(1) stack.write(x, band_id) # resampled single band not needed anymore os.remove(band) # delete also the first band os.remove(paths_resampled[0])
def get_last_response(self, identifier): """Get response from previous job. :param str identifier: IP identifier """ data = super(QCProcessorSearchBase, self).get_last_response(identifier, full=True) try: qi = data['properties']['productInformation']\ ['qualityInformation']['qualityIndicators'] except TypeError: Logger.debug("Broken previous job. Creating new response.") return None # search for feasibilityControlMetric idx = 0 for item in qi: if item["isMeasurementOf"].endswith('feasibilityControlMetric'): break idx += 1 # remove deliveryControlMetric, ... data['properties']['productInformation']\ ['qualityInformation']['qualityIndicators'] = qi[:idx+1] return data
def _delivery_control(self, filename, expected_filesize=None): """Performs delivery control. * check if download file exists * check file size when expected value given Raise QCProcessorDownloadError on failure :param str filename: filepath to check :param int expected_filesize: expected file size in bytes or None """ # check if file exists if not os.path.exists(filename): raise QCProcessorDownloadError( "File {} doesn't exist".format(filename )) if expected_filesize: # check expected filesize if given filesize = os.path.getsize(filename) if filesize != expected_filesize: raise QCProcessorDownloadError( "File {} size ({}) differs from expected value ({})".format( filename, filesize, expected_filesize )) Logger.debug("File {} expected filesize check passed".format( filename ))
def update_response(self, response_data): """Update current IP QI metadata response. :param dict response_data: key value pairs to update """ if response_data.get('lineage') is None: # set default lineage if not defined response_data["lineage"] = self.get_lineage() Logger.debug("Updating response {} on idx {}".format( self.isMeasurementOf, self._current_response_idx)) try: response = self._response[self._current_response_idx] except (IndexError, KeyError): raise ProcessorCriticalError( self, "Inconsistence response counter: index ({}) length ({})". format(self._current_response_idx, len(self._response))) response.update(response_data, self.isMeasurementOf) # check metadata-response consistency ## 1. value == False -> rejected if response.get_value(self.isMeasurementOf) is False and \ self.get_response_status() not in (DbIpOperationStatus.rejected, DbIpOperationStatus.failed): Logger.warning( "Value response status incosistency: setting status from {} to rejected" .format(self.get_response_status())) # value is False -> switch to rejected status self.set_response_status(DbIpOperationStatus.rejected) ## 2. rejected | failed -> value = False if response.status in (DbIpOperationStatus.rejected, DbIpOperationStatus.failed) and \ response.get_value(self.isMeasurementOf) is not False: Logger.warning( "Value response status inconsistency: setting value to False") response.set_value(self.isMeasurementOf, False)
def _run(self, meta_data, data_dir, output_dir): """Perform processor tasks. :param meta_data: IP metadata :param str data_dir: path to data directory :param str output_dir: path to output processor directory :return dict: QI metadata """ response_data = {} # reference image must be defined try: im_reference = self.config.abs_path( self.config['geometry']['reference_image']) except KeyError: Logger.error("Reference image not defined") self.set_response_status(DbIpOperationStatus.failed) return response_data if not os.path.exists(im_reference): Logger.error("Reference image '{}' not found".format(im_reference)) self.set_response_status(DbIpOperationStatus.failed) return response_data # check if stack is already available output_dir = self._get_ip_output_path(meta_data['title']) if os.path.exists(output_dir): files = self.filter_files(output_dir, extension='.tif', pattern='stack_*') if len(files) > 0: if all([os.stat(f).st_size > 0 for f in files]): Logger.debug('Stack ({}) already available, no operation ' 'done'.format(output_dir)) response_data.update( self._run_stack_ordinary_control( meta_data, data_dir, output_dir, response_data)) return response_data Logger.debug('Stack ({}) already available, but has size 0 B. ' 'A new one will be created.'.format(output_dir)) # compute stack if not os.path.exists(output_dir): os.mkdir(output_dir) try: lh_title = os.path.split(output_dir)[-1] stack_name = self.get_stack_name(lh_title) self.create_stack(data_dir, output_dir, stack_name) response_data.update( self._run_stack_ordinary_control(meta_data, data_dir, output_dir, response_data)) except ProcessorRejectedError: self.set_response_status(DbIpOperationStatus.rejected) except ProcessorFailedError: self.set_response_status(DbIpOperationStatus.failed) return response_data
def get_query_params(self): kwargs = self._get_query_params() # date must be converted to string for item in ['start_date', 'end_date']: kwargs[item] = kwargs[item].strftime("%Y-%m-%d") kwargs['producttype'] = 'L1TP' # used for testing Logger.debug("Query: {}".format(kwargs)) return kwargs
def _is_updated(self, data, json_file): """Check if data are updated. :param dict data: data to be checked :param str: filename with alreadys stored data :return bool: True if updated otherwise False """ updated = False json_data = JsonIO.read(json_file) is_l2a = json_data['title'].find('MSIL2A') > -1 # check for updated items first for k, v in json_data.items(): if k in data.keys() and data[k] != v: if isinstance(v, datetime.datetime): dt = datetime.datetime.strptime(data[k], '%Y-%m-%dT%H:%M:%S.%f') if (dt - v).total_seconds() < 0.01: # timedelta under threshold continue Logger.info("Change in file {} detected ({}: {} -> {})".format( os.path.basename(json_file), k, data[k], v)) updated = True # check for added/deleted items if len(data.keys()) != len(json_data.keys()): for k in data.keys(): if k not in json_data: Logger.info( "Change in file {} detected ({} removed)".format( os.path.basename(json_file), k)) updated = True for k in json_data.keys(): if k == 'qcmms': # ignore QCMMS metadata if any continue if is_l2a and k in ('Tile Identifier horizontal order', 'Datatake sensing start', 'Instrument mode', 'Tile Identifier'): # ignore extra metadata items for L2A products continue if k not in data: Logger.info("Change in file {} detected ({} added)".format( os.path.basename(json_file), k)) updated = True if not updated: Logger.debug("No changes in file {} detected".format( os.path.basename(json_file))) return updated
def cleanup_data(self): """Cleanup/Remove downloaded data.""" # remove downloaded data for d in ('metapath', 'downpath'): dirpath = os.path.join(self.config['project']['path'], self.config['project'][d] ) if os.path.exists(dirpath): shutil.rmtree(dirpath) Logger.debug("Project directory {} removed".format( dirpath ))
def check_response(resp): dataset = os.path.splitext(os.path.basename(resp))[0] Logger.info("Processing {}...".format(dataset)) with open(resp) as fd: data = json.load(fd) parent_identifier = data['properties']['parentIdentifier'] response = catalog.query(dataset, parent_identifier) if not args.quiet: print(json.dumps(response, indent=4, sort_keys=True), file=sys.stderr) datasets = catalog.get_datasets(parent_identifier) Logger.debug("Dataset in series: {}".format(dataset in datasets))
def append_to_stack(stack_file, geometry_shift_file_x, geometry_shift_file_y, default_stack_length): """Append geometry shifts to the stack containing scene bands. :param stack_file: Path to the stack file with bands and cloud coverage :param geometry_shift_file_x: Path to the geometry shifts in X dir :param geometry_shift_file_y: Path to the geometry shifts in Y dir :param default_stack_length: Length of stack excluding cloud_coverage """ import rasterio import shutil with rasterio.open(stack_file) as stack: meta = stack.meta if meta['count'] > default_stack_length + 2: Logger.debug( 'Not appending geometry shifts as new bands to stack ' '{}. Stack already existed with the cloud coverage ' 'appended.'.format(stack_file)) return 0 Logger.debug('Appending geometry shifts as new bands to stack ' '{}'.format(stack_file)) dtype = meta['dtype'] stack_length = meta['count'] + 2 meta.update(count=stack_length) fn = tempfile.mkstemp(prefix='stack_tmp_with_shifts_', suffix='.tif')[1] stack_tmp_path = os.path.join(tempfile.gettempdir(), fn) with rasterio.open(stack_tmp_path, 'w', **meta) as stack_tmp: stack_tmp.write(stack.read(1), 1) for band_id in range(2, stack_length - 1): stack_tmp.write(stack.read(band_id), band_id) stack_tmp.write( rasterio.open(geometry_shift_file_x).read(1).astype(dtype), stack_length - 1) stack_tmp.write( rasterio.open(geometry_shift_file_y).read(1).astype(dtype), stack_length) # replace the orig stack with the one with radiometry_control appended os.remove(stack_file) shutil.move(stack_tmp_path, stack_file)
def get_last_response(self, ip, full=False): """ Get QI metadata response from previous job. :param str ip: image product :param bool full: True for full data otherwise only relevant part :return dict: QI metadata """ try: job_id = Logger.db_handler().last_job_id( self.config['processors'][0]) except KeyError: raise ProcessorCriticalError(self, "No processors defined in config") if not job_id: Logger.debug( "First run? Unable to get last response from JSON file") return None json_file = os.path.join(self.config['logging']['directory'], '{0:05d}'.format(job_id), ip + '.json') if not os.path.exists(json_file): raise ProcessorCriticalError( self, "Response file {} not found".format(json_file)) data = JsonIO.read(json_file, response=True) if full: return data relevant_part = QCResponse(data).get(self.isMeasurementOf) if not relevant_part: if self.config['strict']['enabled']: raise ProcessorCriticalError( self, "Unable to get relevant part for {} ({})".format( self.isMeasurementOf, ip)) else: return {} if hasattr(self, "isMeasurementOfSection"): relevant_part_tmp = {} for key in relevant_part.keys(): if key in ("isMeasurementOf", "value", "lineage", self.isMeasurementOfSection): relevant_part_tmp[key] = relevant_part[key] relevant_part = relevant_part_tmp return relevant_part
def create_dir(self, directory): """Create directory if not exists. Raise ConfigError on error. :param str directory: directory to be created """ try: if not os.path.exists(directory): os.makedirs(directory) Logger.debug("Directory {} created".format(directory)) except PermissionError as e: raise ConfigError( self._config_files, "Directory {} failed to created: {}".format(directory, e))
def read(cls, filename, response=False): """Read JSON file. :param str filename: JSON filename to parse :param bool parse_response: True for parsing JSON to be response-valid :return dict: data content """ Logger.debug("Reading {} (response: {})".format(filename, response)) parser = None if response else cls.json_parser with open(filename) as fd: data = json.load( fd, object_hook=parser ) return data
def __init__(self, config_files, cleanup=None, quiet=False): self._quiet = quiet # parse config file self._config_files = copy(config_files) if 'config.yaml' not in self._config_files: self._config_files.insert(0, 'config.yaml') self.config = QCConfigParser(self._config_files) # used by _add_db_handler() self._db_file = os.path.abspath(self.config['logging']['db']) Logger.debug("Using logging db: {}".format(self._db_file)) # force cleanup if requested if cleanup is not None: self._cleanup(cleanup) return # list of processor to run (will be defined by # self.set_processors()) self._processors = [] # set up db-based logger log_dir = os.path.dirname(self._db_file) # create logging directory if not exists self.config.create_dir( log_dir ) # add DB handler self._add_db_handler() # create logging directory if not exists self.config.create_dir( self.config['logging']['directory'] ) # set up file-based logger Logger.add_handler( logging.FileHandler(self._get_log_file()) ) self.response = [] # response composer self._response_composer = QCResponseComposer(self.config['logging']['directory']) Logger.info("QCManager started")
def calculate_tc_std(self, stack_ips, bands, output_dir): """Calculate the TC STD indexes based on Qui et al. 2019 :param list stack_ips: stacked IP for TSI calcualtion :param list bands: bands to process :param str output_dir: output directory :return list: TC STD index per band """ dates_img = self.get_dates_images(stack_ips, output_dir) dates_srt = list(dates_img.keys()) ordinal_dates = self.get_ordinal_dates(dates_srt) # sort mix of Sentinel-2 and Landsat-8 based on acquisition date ips_srt = [] for d in dates_img.keys(): ips_srt.append(dates_img[d]) # Qui et al. 2019 params low = 0 high = 10000 img_0 = self.read_image_array(dates_img[dates_srt[0]]['stack']) img_arr = np.zeros( (len(dates_srt), img_0.shape[0], img_0.shape[1], img_0.shape[2])) for i in range(len(dates_srt)): img = self.read_image_array(ips_srt[i]['stack']) img_cc = self.read_image_array(ips_srt[i]['cc']) Logger.debug("TC_STD: {}".format( os.path.basename(ips_srt[i]['stack']))) for band in bands: valid = self.valid_data_mask(img[band, :, :], low, high) mask = ((img_cc == 1) & valid) * 1 img_mask = mask.astype(np.float) img_mask[img_mask == 0] = np.nan img_arr[i, band, :, :] = img[band, :, :] * img_mask tc_std = np.zeros(img_0.shape) std_result = {} for i in range(img_arr.shape[1]): tc_std[i, :, :] = np.nanstd(img_arr[:, i, :, :], axis=0) std_result['B{:0>2}'.format(i + 1)] = np.nanmean(tc_std[i, :, :]) img_arr = None return std_result
def _store_to_project_path(self, config_files): """Store config to project path. :param list config_files: list of config files """ path = self._cfg['project']['path'] if not os.path.exists(path): # create path if not exists os.makedirs(path) cfile = os.path.join(path, 'config.yaml') with open(cfile, 'w') as fd: for config_file in config_files: with open(config_file) as fd_c: fd.write('### {}{}'.format(self.abs_path(config_file), os.linesep)) fd.write(fd_c.read()) Logger.debug('Configuration stored to {}'.format(cfile))
def write(cls, filename, data, delimiter=';', append=False): """Write CSV data into file. :param str filename: filename where to write data :param dict data: dictionary data to be saved :param str delimiter: delimiter to be used :param bool append: True for append otherwise overwrite """ mode = 'a' if append else 'w' Logger.debug("File {} open with {} mode".format( filename, mode )) with open(filename, mode) as fd: fd.write(delimiter.join(data[0].keys())) fd.write(os.linesep) for item in data: fd.write(delimiter.join(map(cls.csv_formatter, item.values()))) fd.write(os.linesep)
def run_fmask(self, data_dir, output, resolution=20): # pixsize shall be the target spatial resolution from LPST OR # default --pixsize 20 and then resample to target res. => # faster -e TEMPDIR for faster SSD processing Logger.info("Running fmask for {}".format(data_dir)) tmppath = tempfile.mkdtemp() subprocess.run([ 'fmask_sentinel2Stacked.py', '--pixsize', str(resolution), '-o', output, '-e', tmppath, '--safedir', data_dir ], check=True) shutil.rmtree(tmppath) Logger.debug("fmask temp directory removed: {}".format(tmppath)) if output: Logger.info("Output from fmask: {}".format(output))
def append_to_stack(stack_file, radiometry_file, default_stack_length): """Append raster to a stack. :param str stack_file: target stack file :param str radiometry_file: radiometry control raster file :param int default_stack_length: number of bands """ import rasterio import shutil with rasterio.open(stack_file) as stack: meta = stack.meta if meta['count'] > default_stack_length + 3: Logger.debug( 'Not appending radiometry file {} as a new band to stack ' '{}. Stack already existed with the radiometry ' 'appended.'.format(radiometry_file, stack_file)) return 0 Logger.debug('Appending radiometry file {} as a new band to stack ' '{}'.format(radiometry_file, stack_file)) dtype = meta['dtype'] stack_length = stack.meta['count'] + 1 meta.update(count=stack_length) fn = tempfile.mkstemp(prefix='stack_tmp_radiometry_', suffix='.tif')[1] stack_tmp_path = os.path.join(tempfile.gettempdir(), fn) with rasterio.open(stack_tmp_path, 'w', **meta) as stack_tmp: stack_tmp.write(stack.read(1), 1) for band_id in range(2, stack_length): stack_tmp.write(stack.read(band_id), band_id) stack_tmp.write( rasterio.open(radiometry_file).read(1).astype(dtype), stack_length) # replace the orig stack with the one with radiometry_control appended os.remove(stack_file) shutil.move(stack_tmp_path, stack_file)
def get_query_params(self): """Get query. :return dict: query parameters """ kwargs = self._get_query_params() if 'cloudcoverpercentage' in kwargs: kwargs['cloudcoverpercentage'] = (0, kwargs['cloudcoverpercentage']) if 'datefrom' in kwargs and 'dateto' in kwargs: kwargs['date'] = (kwargs['datefrom'], kwargs['dateto']) del kwargs['datefrom'] del kwargs['dateto'] Logger.debug("Query: {}".format(kwargs)) Logger.info("config - processed") return kwargs
def run(self): """Run all registered processors from queue. """ # check if processors defined if not self._processors: raise ConfigError(self._config_files, "list of processors not defined" ) # determine current/previous job id job_id = Logger.db_handler().job_id() prev_job_id = Logger.db_handler().last_job_id(self.config['processors'][0]) Logger.info("Job started (id {})".format(job_id)) Logger.db_handler().job_started() if prev_job_id: Logger.debug("Previous job found (id {})".format(prev_job_id)) else: Logger.debug("No previous job found. Starting from scratch") start = time.time() idx = 0 for proc in self._processors: try: # run the processor proc.run() try: Logger.info('{} processor result: {}'.format( proc.identifier, proc.result() )) except ProcessorResultError: pass # store JSON after each processor self.save_response(proc) except ProcessorFailedError: pass idx += 1 Logger.success( "Job {} successfully finished in {:.6f} sec".format( job_id, time.time() - start ))
def check_bands_read(self, img_files): """Check if raster band can be read. Raise ProcessorRejectedError on failure. :param list img_files: list of image files """ from osgeo import gdal for imfile in img_files: ds = gdal.Open(imfile) if ds is None: raise ProcessorRejectedError( self, "{} is not readable (invalid GDAL datasource)".format( imfile)) ds = None Logger.debug( "File {} is readable (valid GDAL datasource)".format(imfile))
def is_valid(self, response_file): """Validate response QI metadata file. :param str response_file: response file to be validated :return bool: """ data = self.load(response_file) if data is None: return False try: validate(data, schema=self._schema) Logger.debug("JSON response {} is valid".format(response_file)) except ValidationError as e: Logger.error("File {} validation against schema failed: {}".format( response_file, e)) return False return True
def _download_file(self, uuid, identifier, output_dir): """Download IP. :param str uuid: UUID :param str identifier: IP identifier (title) :param str output_dir: output directory """ output_file = os.path.join(output_dir, identifier + self.extension) if os.path.exists(output_file): Logger.debug("Data already downloaded. Skiping") return Logger.info("Downloading {} -> {}".format( uuid, identifier) ) expected_filesize = self.connector.download_file(uuid, output_dir) # control downloaded file if self._delivery_control(output_file, expected_filesize): Logger.debug("Filename {} passed delivery control".format( output_file ))
def _read_config_file(self, config_file): """Read configuration for single file :param str config_file: path to config file """ if not os.path.isabs(config_file): config_file = os.path.normpath( os.path.join(os.path.dirname(__file__), '..', config_file)) self._config_files.append(config_file) # read configuration into dictionary # see https://martin-thoma.com/configuration-files-in-python/ try: with open(config_file, 'r') as ymlfile: try: cfg = yaml.load(ymlfile, Loader=yaml.FullLoader) except AttributeError: # support also older versions of pyyaml cfg = yaml.load(ymlfile) if 'logging' in cfg: # set logging level try: Logger.setLevel(cfg['logging']['level']) except KeyError: pass # keep default log level # self._cfg.update(cfg) for key in cfg.keys(): if key in self._cfg: if isinstance(cfg[key], list): self._cfg[key] = cfg[key] else: # assuming dict for k, v in cfg[key].items(): self._cfg[key][k] = v else: self._cfg[key] = copy(cfg[key]) Logger.debug("Config file '{}' processed".format(config_file)) except Exception as e: raise ConfigError(config_file, e)