def filter_imported_files(self): """ Filer all imported files from coverage_id.resume.json """ resumer = Resumer(self.coverage_id) not_imported_files = resumer.get_not_imported_files(self.files) self.files = not_imported_files
def __init__(self, session): """ The recipe class for regular timeseries. To get an overview of the ingredients needed for this recipe check ingredients/time_series_regular """ super(Recipe, self).__init__(session) self.options = session.get_recipe( )['options'] if "options" in session.get_recipe() else {} self.importer = None self.resumer = Resumer(self.session.get_coverage_id()) validator = GDALValidator(self.session.files) if ConfigManager.skip: self.session.files = validator.get_valid_files()
def __init__(self, session): """ The recipe class for map_mosaic. To get an overview of the ingredients needed for this recipe check ingredients/map_mosaic :param Session session: the session for this import """ super(Recipe, self).__init__(session) self.options = session.get_recipe()['options'] if "options" in session.get_recipe() else {} self.importer = None self.resumer = Resumer(self.session.get_coverage_id()) validator = GDALValidator(self.session.files) if ConfigManager.skip == True: self.session.files = validator.get_valid_files()
def __init__(self, session): """ The recipe class for map_mosaic. To get an overview of the ingredients needed for this recipe check ingredients/map_mosaic :param Session session: the session for this import """ super(Recipe, self).__init__(session) self.options = session.get_recipe()['options'] if "options" in session.get_recipe() else {} self.importer = None self.resumer = Resumer(self.session.get_coverage_id()) self.recipe_type = GdalToCoverageConverter.RECIPE_TYPE if "coverage" in self.options: self.options['coverage']['slicer'] = {} self.options['coverage']['slicer']['type'] = GdalToCoverageConverter.RECIPE_TYPE
def __init__(self, session): """ :param Session session: the session for this import """ super(Recipe, self).__init__(session) self.options = session.get_recipe()['options'] if "options" in session.get_recipe() else {} self.importer = None self.resumer = Resumer(self.session.get_coverage_id())
def __init__(self, session): """ The recipe class for my_custom_recipe (check wcst_import guide from rasdaman web page for more details). :param Session session: the session for the import tun """ super(Recipe, self).__init__(session) self.options = session.get_recipe()['options'] self.importer = None self.resumer = Resumer(self.session.get_coverage_id())
def __init__(self, coverage, insert_into_wms=False, scale_levels=None, grid_coverage=False): """ Imports a coverage into wcst :param Coverage coverage: the coverage to be imported """ self.coverage = coverage self.resumer = Resumer(coverage.coverage_id) self.coverage.slices = SliceRestricter( self.resumer.eliminate_already_imported_slices( self.coverage.slices)).get_slices() self.processed = 0 self.total = len(coverage.slices) self.insert_into_wms = insert_into_wms self.scale_levels = scale_levels self.grid_coverage = grid_coverage
def _get_convertors(self): """ Returns a map of coverage id -> GdalToCoverageConverter """ convertors = {} band_data_type = self.DEFAULT_BAND_DATA_TYPE if self.product == self.SLC_PRODUCT: band_data_type = self.SLC_BAND_DATA_TYPE for file in self.session.get_files(): # Check if this file still exists when preparing to import if not FileUtil.validate_file_path(file.get_filepath()): continue # Check if this file belongs to this coverage id modebeam, polarisation = self._get_modebeam_polarisation( file.filepath) cov_id = self._get_coverage_id(self.coverage_id, modebeam, polarisation) # This file already imported in coverage_id.resume.json self.resumer = Resumer(cov_id) if self.resumer.is_file_imported(file.filepath): continue conv = self._get_convertor(convertors, cov_id) file_pair = FilePair(file.filepath, file.filepath) conv.files = [file_pair] crs_axes = CRSUtil(conv.crs).get_axes(self.coverage_id) # Different file contains different datetime from its name evaluator_slice = EvaluatorSliceFactory.get_evaluator_slice( GdalToCoverageConverter.RECIPE_TYPE, file) conv.data_type = band_data_type slices = conv._create_coverage_slices(crs_axes, evaluator_slice) conv.coverage_slices += slices return convertors
def __init__(self, session): """ The recipe class for wcs. To get an overview of the ingredients needed for this recipe check ingredients/map_mosaic :param Session session: the session for this import """ super(Recipe, self).__init__(session) self.options = session.get_recipe()['options'] if "options" in session.get_recipe() else {} self.importer = None self.resumer = Resumer(self.session.get_coverage_id()) self.coverage = None
def __init__(self, coverage, insert_into_wms=False): """ Imports a coverage into wcst :param Coverage coverage: the coverage to be imported """ self.coverage = coverage self.resumer = Resumer(coverage.coverage_id) self.coverage.slices = SliceRestricter( self.resumer.eliminate_already_imported_slices(self.coverage.slices)).get_slices() self.processed = 0 self.total = len(coverage.slices) self.insert_into_wms = insert_into_wms
def __init__(self, session): """ :param Session session: the session for this import """ super(Recipe, self).__init__(session) self.session = session self.options = session.get_recipe()['options'] self.validate() self.coverage_id = self.session.get_input()["coverage_id"] self.resumer = Resumer(self.coverage_id) self.source_coverage_ids = self.parse_source_coverage_ids( self.session.get_input()["source_coverage_ids"]) self.envelope = self.options["envelope"] self.srs_name = XMLUtil.escape(self.envelope["srsName"]) # array of axis self.axes = self.envelope["axis"] self.import_wms = self.options[ "wms_import"] if "wms_import" in self.options else None
def _get_convertors(self): """ Returns a map of coverage id -> GdalToCoverageConverter """ convertors = {} for f in self.session.get_files(): # This one does not contain any information for geo bounds if not FileUtil.validate_file_path(f.get_filepath()): continue gdal_ds = GDALGmlUtil(f.get_filepath()) subdatasets = self._get_subdatasets(gdal_ds, f) gdal_ds.close() level = self._get_level(f.get_filepath()) if len(self.levels) > 0 and level not in self.levels: # skip file, as it's not in the list of levels provided in the ingredients file log.debug("Skipping " + level + " data") continue crs_code = "" evaluator_slice = None for res in self.resolutions: subds_file = self._get_subdataset_file(subdatasets, res) crs_code = self._get_crs_code(subds_file.get_filepath(), crs_code) if len(self.crss) > 0 and crs_code not in self.crss: # skip CRS, it's not in the list of CRSs provided in the ingredients file log.debug("Skipping data with CRS " + crs_code) continue cov_id = self._get_coverage_id(self.coverage_id, crs_code, level, res) # This file already imported in coverage_id.resume.json self.resumer = Resumer(cov_id) if self.resumer.is_file_imported(f.filepath): continue conv = self._get_convertor(convertors, cov_id, crs_code, level, res) file_pair = FilePair(subds_file.filepath, f.filepath) conv.files = [file_pair] crs_axes = CRSUtil(conv.crs).get_axes(self.coverage_id) if evaluator_slice is None: # This one contains information for geo bounds evaluator_slice = EvaluatorSliceFactory.get_evaluator_slice( GdalToCoverageConverter.RECIPE_TYPE, subds_file) # Resolution 10m, 20m and 60m have same data type (UInt16) while TCI has data type (Byte) if res == self.RES_TCI: conv.data_type = "Byte" else: conv.data_type = "UInt16" # Fixed values for 3 axes of Sentinel 2 coverage axis_resolutions = self.RES_DICT[res] slices = conv._create_coverage_slices(crs_axes, evaluator_slice, axis_resolutions) conv.coverage_slices += slices return convertors
class Recipe(BaseRecipe): def __init__(self, session): """ The recipe class for irregular timeseries. To get an overview of the ingredients needed for this recipe check ingredients/time_series_irregular """ super(Recipe, self).__init__(session) self.options = session.get_recipe( )['options'] if "options" in session.get_recipe() else {} self.importer = None self.resumer = Resumer(self.session.get_coverage_id()) self.recipe_type = GdalToCoverageConverter.RECIPE_TYPE if "coverage" in self.options: self.options['coverage']['slicer'] = {} self.options['coverage']['slicer'][ 'type'] = GdalToCoverageConverter.RECIPE_TYPE def validate(self): super(Recipe, self).validate() if "time_crs" not in self.options or self.options['time_crs'] == "": raise RecipeValidationException("No valid time crs provided") if 'time_parameter' not in self.options: raise RecipeValidationException("No valid time parameter provided") if 'metadata_tag' not in self.options[ 'time_parameter'] and 'filename' not in self.options[ 'time_parameter']: raise RecipeValidationException( "You have to provide either a metadata_tag or a filename pattern for the time parameter" ) if 'datetime_format' not in self.options['time_parameter']: raise RecipeValidationException( "No valid datetime_format provided") if 'metadata_tag' in self.options['time_parameter'] and \ "tag_name" not in self.options['time_parameter']['metadata_tag']: raise RecipeValidationException( "No metadata tag to extract time from gdal was provided") if 'filename' in self.options['time_parameter'] \ and self.options['time_parameter']['filename']['regex'] == "" \ and self.options['time_parameter']['filename']['group'] == "": raise RecipeValidationException( "No filename regex and group to extract time from gdal was provided" ) if 'band_names' not in self.options: self.options['band_names'] = None def describe(self): """ Implementation of the base recipe describe method """ importer = self._get_importer() slices = importer.get_slices_for_description() number_of_files = len(slices) log.info( "All files have been analyzed. Please verify that the axis subsets of the first {} files above are correct." .format(number_of_files)) index = 1 for slice in slices: log.info("Slice " + str(index) + ": " + str(slice)) index += 1 def ingest(self): """ Ingests the input files """ importer = self._get_importer() importer.ingest() def status(self): """ Implementation of the status method :rtype (int, int) """ return self._get_importer().get_progress() def _generate_timeseries_tuples(self, limit=None): """ Generate the timeseries tuples from the original files based on the recipe. And sort the files in order of time. :rtype: list[TimeFileTuple] """ ret = [] if limit is None: limit = len(self.session.get_files()) time_format = None if 'datetime_format' in self.options['time_parameter']: time_format = self.options['time_parameter']['datetime_format'] if 'metadata_tag' in self.options['time_parameter']: mtag = self.options['time_parameter']['metadata_tag']['tag_name'] for tfile in self.session.get_files(): if len(ret) == limit: break valid_file = True try: gdal_file = GDALGmlUtil(tfile.get_filepath()) except Exception as ex: FileUtil.ignore_coverage_slice_from_file_if_possible( tfile.get_filepath(), ex) valid_file = False if valid_file: dtutil = DateTimeUtil(gdal_file.get_datetime(mtag), time_format, self.options['time_crs']) ret.append(TimeFileTuple(dtutil, tfile)) elif 'filename' in self.options['time_parameter'] and len(ret) < limit: regex = self.options['time_parameter']['filename']['regex'] group = int(self.options['time_parameter']['filename']['group']) for tfile in self.session.get_files(): if len(ret) == limit: break dtutil = DateTimeUtil( re.search(regex, tfile.filepath).group(group), time_format, self.options['time_crs']) ret.append(TimeFileTuple(dtutil, tfile)) else: raise RecipeValidationException( "No method to get the time parameter, you should either choose " "metadata_tag or filename.") # Currently, only sort by datetime to import coverage slices (default is ascending), option: to sort descending if self.options[ "import_order"] == AbstractToCoverageConverter.IMPORT_ORDER_DESCENDING: return sorted(ret, reverse=True) return sorted(ret) def _get_coverage_slices(self, crs, gdal_coverage_converter): """ Returns the slices for the collection of files given """ crs_axes = CRSUtil(crs).get_axes(self.session.coverage_id) slices = [] timeseries = self._generate_timeseries_tuples() count = 1 for tpair in timeseries: file_path = tpair.file.get_filepath() # NOTE: don't process any imported file from *.resume.json as it is just waisted time if not self.resumer.is_file_imported(file_path): timer = Timer() # print which file is analyzing FileUtil.print_feedback(count, len(timeseries), file_path) if not FileUtil.validate_file_path(file_path): continue valid_coverage_slice = True try: subsets = GdalAxisFiller(crs_axes, GDALGmlUtil(file_path)).fill(True) subsets = self._fill_time_axis(tpair, subsets) except Exception as ex: # If skip: true then just ignore this file from importing, else raise exception FileUtil.ignore_coverage_slice_from_file_if_possible( file_path, ex) valid_coverage_slice = False if valid_coverage_slice: # Generate local metadata string for current coverage slice self.evaluator_slice = EvaluatorSliceFactory.get_evaluator_slice( self.recipe_type, tpair.file) local_metadata = gdal_coverage_converter._generate_local_metadata( subsets, self.evaluator_slice) slices.append( Slice(subsets, FileDataProvider(tpair.file), local_metadata)) timer.print_elapsed_time() count += 1 return slices def _fill_time_axis(self, tpair, subsets): """ Fills the time axis parameters :param TimeFileTuple tpair: the input pair :param list[AxisSubset] subsets: the axis subsets for the tpair """ for i in range(0, len(subsets)): if subsets[i].coverage_axis.axis.crs_axis is not None and subsets[ i].coverage_axis.axis.crs_axis.is_time_axis(): subsets[i].coverage_axis.axis = IrregularAxis( subsets[i].coverage_axis.axis.label, subsets[i].coverage_axis.axis.uomLabel, tpair.time.to_string(), tpair.time.to_string(), tpair.time.to_string(), [0], subsets[i].coverage_axis.axis.crs_axis) subsets[i].coverage_axis.grid_axis.resolution = 1 subsets[i].interval.low = tpair.time return subsets def _get_coverage(self): """ Returns the coverage to be used for the importer """ gdal_dataset = GDALGmlUtil.open_gdal_dataset_from_any_file( self.session.get_files()) crs = CRSUtil.get_compound_crs( [self.options['time_crs'], gdal_dataset.get_crs()]) general_recipe = GeneralRecipe(self.session) global_metadata_fields = general_recipe._global_metadata_fields() local_metadata_fields = general_recipe._local_metadata_fields() sentence_evaluator = SentenceEvaluator(ExpressionEvaluatorFactory()) gdal_coverage_converter = GdalToCoverageConverter( self.resumer, self.session.get_default_null_values(), self.recipe_type, sentence_evaluator, self.session.get_coverage_id(), None, self.session.get_files(), crs, None, None, global_metadata_fields, local_metadata_fields, None, None, general_recipe._metadata_type(), None, None) coverage_slices = self._get_coverage_slices(crs, gdal_coverage_converter) fields = GdalRangeFieldsGenerator( gdal_dataset, self.options['band_names']).get_range_fields() global_metadata = None if len(coverage_slices) > 0: global_metadata = gdal_coverage_converter._generate_global_metadata( coverage_slices[0], self.evaluator_slice) coverage = Coverage(self.session.get_coverage_id(), coverage_slices, fields, crs, gdal_dataset.get_band_gdal_type(), self.options['tiling'], global_metadata) return coverage def _get_importer(self): if self.importer is None: self.importer = Importer(self.resumer, self._get_coverage(), self.options['wms_import'], self.options['scale_levels']) return self.importer @staticmethod def get_name(): return "time_series_irregular"
class Importer: def __init__(self, coverage, insert_into_wms=False): """ Imports a coverage into wcst :param Coverage coverage: the coverage to be imported """ self.coverage = coverage self.resumer = Resumer(coverage.coverage_id) self.coverage.slices = SliceRestricter( self.resumer.eliminate_already_imported_slices(self.coverage.slices)).get_slices() self.processed = 0 self.total = len(coverage.slices) self.insert_into_wms = insert_into_wms def ingest(self): """ Ingests the given coverage """ if len(self.coverage.slices) > 0: if self._is_insert(): self._initialize_coverage() # Insert the remaining slices self._insert_slices() if self.insert_into_wms: self._insert_into_wms() def get_progress(self): """ Returns the progress of the import :rtype: tuple """ if self.total == 0: log.warn("No slices to import.") return -1, -1 return self.processed, self.total def _insert_slice(self, current): """ Inserts one slice :param Slice current: the slice to be imported """ current_exception = None current_str = "" for attempt in range(0, ConfigManager.retries): try: current_str = str(current) file = self._generate_gml_slice(current) subsets = self._get_update_subsets_for_slice(current) request = WCSTUpdateRequest(self.coverage.coverage_id, file.get_url(), subsets, ConfigManager.insitu) executor = ConfigManager.executor executor.execute(request) file.release() self.resumer.add_imported_data(current.data_provider) except Exception as e: log.warn( "\nException thrown when trying to insert slice: \n" + current_str + "Retrying, you can safely ignore the warning for now. Tried " + str( attempt + 1) + " times.\n") current_exception = e sleep(ConfigManager.retry_sleep) pass else: break else: log.warn("\nFailed to insert slice. Attempted " + str(ConfigManager.retries) + " times.") raise current_exception def get_slices_for_description(self): """ Returns a list with the first slices to be used in the import description :rtype: list[Slice] """ slices = [] max = ConfigManager.description_max_no_slices if ConfigManager.description_max_no_slices < len( self.coverage.slices) else len(self.coverage.slices) for i in range(0, max): slices.append(self.coverage.slices[i]) return slices def _insert_slices(self): """ Insert the slices of the coverage """ for i in range(self.processed, self.total): try: self._insert_slice(self.coverage.slices[i]) except Exception as e: if ConfigManager.skip: log.warn("Skipped slice " + str(self.coverage.slices[i])) else: raise e self.processed += 1 def _initialize_coverage(self): """ Initializes the coverage """ file = self._generate_initial_gml_slice() request = WCSTInsertRequest(file.get_url(), False, self.coverage.pixel_data_type, self.coverage.tiling) executor = ConfigManager.executor current_insitu_value = executor.insitu executor.insitu = None executor.execute(request) executor.insitu = current_insitu_value file.release() def _get_update_subsets_for_slice(self, slice): """ Returns the given slice's interval as a list of wcst subsets :param slice: the slice for which to generate this :rtype: list[WCSTSubset] """ subsets = [] for axis_subset in slice.axis_subsets: low = axis_subset.interval.low high = axis_subset.interval.high if ConfigManager.subset_correction and high is not None and low != high and type(low) != str: low += float(axis_subset.coverage_axis.grid_axis.resolution) / 2 if high is not None: high -= float(axis_subset.coverage_axis.grid_axis.resolution) / 2 subsets.append(WCSTSubset(axis_subset.coverage_axis.axis.label, low, high)) return subsets def _generate_gml_slice(self, slice): """ Generates the gml for a regular slice :param slice: the slice for which the gml should be created :rtype: File """ metadata_provider = MetadataProvider(self.coverage.coverage_id, self._get_update_axes(slice), self.coverage.range_fields, self.coverage.crs, None) data_provider = slice.data_provider file = Mediator(metadata_provider, data_provider).get_gml_file() return file def _get_update_axes(self, slice): """ Returns the axes for the slices that are bound to the data (e.g. Lat and Long for a 2-D raster) :param slice: the slice for which the gml should be created :rtype: dict[Axis, GridAxis] """ axes = OrderedDict() for axis_subset in slice.axis_subsets: if axis_subset.coverage_axis.data_bound: axes[axis_subset.coverage_axis.axis] = axis_subset.coverage_axis.grid_axis return axes def _generate_initial_gml_slice(self): """ Returns the initial slice in gml format :rtype: File """ return self._generate_initial_gml_db() def _generate_initial_gml_db(self): """ Generates the initial slice in gml for importing using the database method and returns the gml for it :rtype: File """ # Transform the axes domains such that only a point is defined. # For the first slice we need to import a single point, which will then be updated with the real data axes_map = OrderedDict() for axis, grid_axis in self.coverage.get_insert_axes().iteritems(): if axis.coefficient is not None: # Get the first coefficient in irregular coverage to create a initial slice axis.coefficient = [axis.coefficient[0]] axes_map[axis] = GridAxis(grid_axis.order, grid_axis.label, grid_axis.resolution, 0, 0) metadata_provider = MetadataProvider(self.coverage.coverage_id, axes_map, self.coverage.range_fields, self.coverage.crs, self.coverage.metadata) tuple_list = ",".join(['0'] * len(self.coverage.range_fields)) data_provider = TupleListDataProvider(tuple_list) file = Mediator(metadata_provider, data_provider).get_gml_file() return file def _generate_initial_gml_inistu(self): """ Generates the initial slice in gml for importing using the insitu method and returns the gml file for it :rtype: File """ metadata_provider = MetadataProvider(self.coverage.coverage_id, self.coverage.get_insert_axes(), self.coverage.range_fields, self.coverage.crs, self.coverage.metadata) data_provider = self.coverage.slices[0].data_provider file = Mediator(metadata_provider, data_provider).get_gml_file() self.processed += 1 self.resumer.add_imported_data(data_provider) return file def _insert_into_wms(self): """ Inserts the coverage into the wms service """ try: request = WMSTFromWCSInsertRequest(self.coverage.coverage_id, False) ConfigManager.executor.execute(request) except Exception as e: log.error( "Exception thrown when importing in WMS. Please try to reimport in WMS manually.") raise e def _is_insert(self): """ Returns true if the coverage should be inserted, false if only updates are needed :rtype: bool """ cov = CoverageUtil(self.coverage.coverage_id) return not cov.exists()
class Recipe(BaseRecipe): def __init__(self, session): """ The recipe class for map_mosaic. To get an overview of the ingredients needed for this recipe check ingredients/map_mosaic :param Session session: the session for this import """ super(Recipe, self).__init__(session) self.options = session.get_recipe()['options'] if "options" in session.get_recipe() else {} self.importer = None self.resumer = Resumer(self.session.get_coverage_id()) validator = GDALValidator(self.session.files) if ConfigManager.skip == True: self.session.files = validator.get_valid_files() def validate(self): """ Implementation of the base recipe validate method """ super(Recipe, self).validate() if 'band_names' not in self.options: self.options['band_names'] = None def describe(self): """ Implementation of the base recipe describe method """ importer = self._get_importer() slices = importer.get_slices_for_description() number_of_files = len(slices) log.info("All files have been analyzed. Please verify that the axis subsets of the first {} files above are correct.".format(number_of_files)) index = 1 for slice in slices: log.info("Slice " + str(index) + ": " + str(slice)) index += 1 def ingest(self): """ Starts the ingesting process """ importer = self._get_importer() importer.ingest() def status(self): """ Implementation of the status method :rtype (int, int) """ return self._get_importer().get_progress() def _get_slices(self, crs): """ Returns the slices for the collection of files given """ files = self.session.get_files() crs_axes = CRSUtil(crs).get_axes(self.session.coverage_id) slices = [] count = 1 for file in files: # NOTE: don't process any imported file from *.resume.json as it is just waisted time if not self.resumer.is_file_imported(file.filepath): timer = Timer() # print which file is analyzing FileUtil.print_feedback(count, len(files), file.filepath) if not FileUtil.validate_file_path(file.filepath): continue valid_coverage_slice = True try: subsets = GdalAxisFiller(crs_axes, GDALGmlUtil(file.get_filepath())).fill() except Exception as ex: # If skip: true then just ignore this file from importing, else raise exception FileUtil.ignore_coverage_slice_from_file_if_possible(file.get_filepath(), ex) valid_coverage_slice = False if valid_coverage_slice: slices.append(Slice(subsets, FileDataProvider(file))) timer.print_elapsed_time() count += 1 return slices def _get_coverage(self): """ Returns the coverage to be used for the importer """ gdal_dataset = GDALGmlUtil.open_gdal_dataset_from_any_file(self.session.get_files()) crs = gdal_dataset.get_crs() slices = self._get_slices(crs) fields = GdalRangeFieldsGenerator(gdal_dataset, self.options['band_names']).get_range_fields() coverage = Coverage(self.session.get_coverage_id(), slices, fields, gdal_dataset.get_crs(), gdal_dataset.get_band_gdal_type(), self.options['tiling']) return coverage def _get_importer(self): if self.importer is None: self.importer = Importer(self.resumer, self._get_coverage(), self.options['wms_import'], self.options['scale_levels'], False) return self.importer @staticmethod def get_name(): return "map_mosaic"
class Recipe(GeneralCoverageRecipe): # # constants # RECIPE_NAME = "sentinel1" # coverage Id scheme: S1_GRD_${modebeam}_${polarisation} # e,g: S1_GRD_IW/EW _HH,VV,VH,.. # Sentinel 1 tiff pattern # e.g: s1b-iw-grd-vh-20190324t164346-20190324t164411-015499-01d0a6-002.tiff GRD_FILE_PATTERN = "(.*)-(.*)-grd-(.*)-(.*)-(.*)-(.*)-(.*)-(.*).tiff" grd_pattern = re.compile(GRD_FILE_PATTERN) # variables that can be used to template the coverage id VAR_MODEBEAM = '${modebeam}' VAR_POLARISATION = '${polarisation}' # 1 tiff file contains 1 band BAND = UserBand("1", "Grey", "", "", "", [0], "") DEFAULT_MODEBEAMS = ["EW", "IW"] DEFAULT_POLARISATIONS = ["HH", "HV", "VH", "VV"] # Sentinel 1 contains 1 band DEFAULT_BAND_DATA_TYPE = "UInt16" EPSG_XY_CRS = "$EPSG_XY_CRS" CRS_TEMPLATE = "OGC/0/AnsiDate@" + EPSG_XY_CRS DEFAULT_IMPORT_ORDER = GdalToCoverageConverter.IMPORT_ORDER_ASCENDING DEFAULT_NULL_VALUE = 0 # # public # def __init__(self, session): super(Recipe, self).__init__(session) self._init_options() def validate(self): super(Recipe, self).validate() valid_files = [] # Local validate for input files for file in self.session.get_files(): file_name = os.path.basename(file.get_filepath()) if not bool(re.match(self.GRD_FILE_PATTERN, file_name)): log.warn( "File '" + file.get_filepath() + "' is not valid GRD TIFF file, ignored for further processing." ) else: valid_files.append(file) self.session.files = valid_files def describe(self): log.info("The recipe has been validated and is ready to run.") log.info(make_bold("Recipe: ") + self.session.get_recipe()['name']) log.info(make_bold("WCS Service: ") + ConfigManager.wcs_service) log.info(make_bold("Mocked: ") + str(ConfigManager.mock)) if ConfigManager.track_files: log.info( make_bold("Track files: ") + str(ConfigManager.track_files)) if ConfigManager.skip: log.info(make_bold("Skip: ") + str(ConfigManager.skip)) if ConfigManager.retry: log.info(make_bold("Retries: ") + str(ConfigManager.retries)) if ConfigManager.slice_restriction is not None: log.info( make_bold("Slice Restriction: ") + str(ConfigManager.slice_restriction)) multiimporter = self._get_importer() cov_num = len(multiimporter.importers) i = 1 for importer in multiimporter.importers: log.info("Coverage {}/{} - {}: {} files.".format( i, cov_num, make_bold(importer.coverage.coverage_id), len(importer.coverage.slices))) i += 1 def ingest(self): self._get_importer().ingest() def status(self): return self._get_importer().get_progress() # # private # def _init_options(self): self._init_coverage_options() self._init_input_options() self.coverage_id = self.session.get_coverage_id() self.import_order = self._set_option(self.options, "import_order", self.DEFAULT_IMPORT_ORDER) self.wms_import = self._set_option(self.options, "wms_import", False) self.scale_levels = self._set_option(self.options, "scale_levels", []) self.grid_cov = False def _init_coverage_options(self): covopts = self.options["coverage"] self._init_epsg_xy_crs() compound_crs = self.CRS_TEMPLATE.replace(self.EPSG_XY_CRS, self.epsg_xy_crs) self.crs = self._set_option(covopts, "crs", self._resolve_crs(compound_crs)) self._set_option(covopts, "slicer", {}) self._init_slicer_options(covopts) def _init_input_options(self): # specify a subset of resolutions to ingest inputopts = self.session.get_input() self.modebeams = self._set_option(inputopts, "modebeams", self.DEFAULT_MODEBEAMS) self.polarisations = self._set_option(inputopts, "polarisations", self.DEFAULT_POLARISATIONS) def _init_slicer_options(self, covopts): sliceropts = covopts["slicer"] self._set_option(sliceropts, "type", "gdal") self._set_option(sliceropts, "pixelIsPoint", False) axesopts = self._init_axes_options() if "axes" in sliceropts: for axis in sliceropts["axes"]: for i in sliceropts["axes"][axis]: axesopts[axis][i] = sliceropts["axes"][axis][i] sliceropts["axes"] = axesopts def _init_axes_options(self): epsg_xy_axes_labels = self.__get_epsg_xy_axes_labels() return { "ansi": { # e.g. s1b-iw-grd-vh-20190324t164346-20190324t164411-015499-01d0a6-002.tiff "min": "datetime(regex_extract('${file:name}', '" + self.GRD_FILE_PATTERN + "', 4), 'YYYYMMDD')", "gridOrder": 0, "type": "ansidate", "irregular": True, "dataBound": False }, epsg_xy_axes_labels[0]: { "min": "${gdal:minX}", "max": "${gdal:maxX}", "gridOrder": 1, "resolution": "${gdal:resolutionX}" }, epsg_xy_axes_labels[1]: { "min": "${gdal:minY}", "max": "${gdal:maxY}", "gridOrder": 2, "resolution": "${gdal:resolutionY}" } } def _init_epsg_xy_crs(self): """ From the first file of input file, detect its EPSG code for XY axes """ gdal_ds = GDALGmlUtil(self.session.get_files()[0].get_filepath()) self.epsg_xy_crs = gdal_ds.get_crs() def __get_epsg_xy_axes_labels(self): """ Return a tuple of axis labels for X and Y axes """ axes_labels = CRSUtil.get_axis_labels_from_single_crs(self.epsg_xy_crs) axis_type1 = CRSAxis.get_axis_type_by_name(axes_labels[0]) # XY order (e.g: EPSG:3857) if axis_type1 == CRSAxis.AXIS_TYPE_X: return axes_labels[0], axes_labels[1] else: # YX order (e.g: EPSG:4326) needs to swap order return axes_labels[1], axes_labels[0] def _set_option(self, opts, key, default_value): if key not in opts: opts[key] = default_value return opts[key] def _get_importer(self): if self.importer is None: self.importer = MultiImporter(self._get_importers()) return self.importer def _get_importers(self): ret = [] convertors = self._get_convertors() for cov_id, conv in convertors.iteritems(): coverage_slices = conv.coverage_slices importer = Importer(conv.resumer, conv.to_coverage(coverage_slices), self.wms_import, self.scale_levels, self.grid_cov) ret.append(importer) return ret def _get_convertors(self): """ Returns a map of coverage id -> GdalToCoverageConverter """ convertors = {} for file in self.session.get_files(): # Check if this file still exists when preparing to import if not FileUtil.validate_file_path(file.get_filepath()): continue # Check if this file belongs to this coverage id modebeam, polarisation = self._get_modebeam_polarisation( file.filepath) cov_id = self._get_coverage_id(self.coverage_id, modebeam, polarisation) # This file already imported in coverage_id.resume.json self.resumer = Resumer(cov_id) if self.resumer.is_file_imported(file.filepath): continue conv = self._get_convertor(convertors, cov_id) file_pair = FilePair(file.filepath, file.filepath) conv.files = [file_pair] crs_axes = CRSUtil(conv.crs).get_axes(self.coverage_id) # Different file contains different datetime from its name evaluator_slice = EvaluatorSliceFactory.get_evaluator_slice( GdalToCoverageConverter.RECIPE_TYPE, file) conv.data_type = self.DEFAULT_BAND_DATA_TYPE slices = conv._create_coverage_slices(crs_axes, evaluator_slice) conv.coverage_slices += slices return convertors def _get_modebeam_polarisation(self, file_path): """ If this file's name matches a combination of resolution(e.g: H), modebeam(e.g: EW), polarisation(e.g: HH) then it is valid to import to this coverage S1_GRDH_EW_HH """ # e.g: s1a-iw-grd-vh-20190326t171654-20190326t171719-026512-02f856-002.tiff file_name = os.path.basename(file_path) matcher = self.grd_pattern.match(file_name) tmp_modebeam = matcher.group(2) tmp_polarisation = matcher.group(3) return tmp_modebeam.upper(), tmp_polarisation.upper() def _get_coverage_id(self, cov_id, modebeam, polarisation): return cov_id.replace(self.VAR_MODEBEAM, modebeam) \ .replace(self.VAR_POLARISATION, polarisation) def _get_convertor(self, convertors, cov_id): if not cov_id in convertors: convertors[cov_id] = \ self._create_convertor(cov_id) return convertors[cov_id] def _create_convertor(self, cov_id): recipe_type = GdalToCoverageConverter.RECIPE_TYPE sentence_evaluator = SentenceEvaluator(ExpressionEvaluatorFactory()) files = [] bands_metadata_fields = {} axis_metadata_fields = {} default_null_values = [self.DEFAULT_NULL_VALUE] return GdalToCoverageConverter( self.resumer, default_null_values, recipe_type, sentence_evaluator, cov_id, [self.BAND], files, self.crs, self._read_axes(self.crs), self.options['tiling'], self._global_metadata_fields(), self._local_metadata_fields(), bands_metadata_fields, axis_metadata_fields, self._metadata_type(), self.grid_cov, self.import_order) @staticmethod def get_name(): return Recipe.RECIPE_NAME
class Recipe(GeneralCoverageRecipe): # # constants # RECIPE_NAME = "sentinel2" # supported product levels LVL_L1C = 'L1C' LVL_L2A = 'L2A' LEVELS = [LVL_L1C, LVL_L2A] # resolutions in a single Sentinel 2 dataset; TCI (True Color Image) is 10m RES_10m = '10m' RES_20m = '20m' RES_60m = '60m' RES_TCI = 'TCI' # resolution (subdataset name) -> actual resolution numbers RES_DICT = { RES_10m: [1, 10, -10], RES_20m: [1, 20, -20], RES_60m: [1, 60, -60], RES_TCI: [1, 10, -10] } # list of subdatasets to import SUBDATASETS = [RES_10m, RES_20m, RES_60m, RES_TCI] # variables that can be used to template the coverage id VAR_CRS_CODE = '${crsCode}' VAR_RESOLUTION = '${resolution}' VAR_LEVEL = '${level}' # bands for each resolution BANDS_L1C = { RES_10m: [ UserBand("1", "B4", "red, central wavelength 665 nm", "", "", [0], ""), UserBand("2", "B3", "green, central wavelength 560 nm", "", "", [0], ""), UserBand("3", "B2", "blue, central wavelength 490 nm", "", "", [0], ""), UserBand("4", "B8", "nir, central wavelength 842 nm", "", "", [0], "") ], RES_20m: [ UserBand("1", "B5", "central wavelength 705 nm", "", "", [0], ""), UserBand("2", "B6", "central wavelength 740 nm", "", "", [0], ""), UserBand("3", "B7", "central wavelength 783 nm", "", "", [0], ""), UserBand("4", "B8A", "central wavelength 865 nm", "", "", [0], ""), UserBand("5", "B11", "central wavelength 1610 nm", "", "", [0], ""), UserBand("6", "B12", "central wavelength 2190 nm", "", "", [0], "") ], RES_60m: [ UserBand("1", "B1", "central wavelength 443 nm", "", "", [0], ""), UserBand("2", "B9", "central wavelength 945 nm", "", "", [0], ""), UserBand("3", "B10", "central wavelength 1375 nm", "", "", [0], "") ], RES_TCI: [ UserBand("1", "red", "B4, central wavelength 665 nm", "", "", [0], ""), UserBand("2", "green", "B3, central wavelength 560 nm", "", "", [0], ""), UserBand("3", "blue", "B2, central wavelength 490 nm", "", "", [0], "") ], } # L2A is same as L1C but doesn't have B10 in the 60m subdataset BANDS_L2A = { RES_10m: BANDS_L1C[RES_10m], RES_20m: BANDS_L1C[RES_20m], RES_60m: [ UserBand("1", "B1", "central wavelength 443 nm", "", "", [0], "nm"), UserBand("2", "B9", "central wavelength 945 nm", "", "", [0], "nm"), ], RES_TCI: BANDS_L1C[RES_TCI], } BANDS = {LVL_L1C: BANDS_L1C, LVL_L2A: BANDS_L2A} DEFAULT_CRS = "OGC/0/AnsiDate@EPSG/0/${crsCode}" DEFAULT_IMPORT_ORDER = GdalToCoverageConverter.IMPORT_ORDER_ASCENDING DEFAULT_NULL_VALUE = 0 # # public # def __init__(self, session): super(Recipe, self).__init__(session) self._init_options() def validate(self): super(Recipe, self).validate() if len(self.resolutions) == 0: raise RecipeValidationException( "No resolutions to import provided.") for res in self.resolutions: if res not in self.SUBDATASETS: raise RecipeValidationException( "Invalid resolution '" + str(res) + "' provided, expected a subset of " + str(self.SUBDATASETS)) for lvl in self.levels: if lvl not in self.LEVELS: raise RecipeValidationException( "Invalid level '" + str(lvl) + "' provided, expected a subset of " + str(self.LEVELS)) def describe(self): log.info("The recipe has been validated and is ready to run.") log.info(make_bold("Recipe: ") + self.session.get_recipe()['name']) log.info(make_bold("WCS Service: ") + ConfigManager.wcs_service) log.info(make_bold("Mocked: ") + str(ConfigManager.mock)) if ConfigManager.track_files: log.info( make_bold("Track files: ") + str(ConfigManager.track_files)) if ConfigManager.skip: log.info(make_bold("Skip: ") + str(ConfigManager.skip)) if ConfigManager.retry: log.info(make_bold("Retries: ") + str(ConfigManager.retries)) if ConfigManager.slice_restriction is not None: log.info( make_bold("Slice Restriction: ") + str(ConfigManager.slice_restriction)) multiimporter = self._get_importer() cov_num = len(multiimporter.importers) i = 1 for importer in multiimporter.importers: log.info("Coverage {}/{} - {}: {} files.".format( i, cov_num, make_bold(importer.coverage.coverage_id), len(importer.coverage.slices))) i += 1 def ingest(self): self._get_importer().ingest() def status(self): return self._get_importer().get_progress() # # private # def _init_options(self): self._init_coverage_options() self._init_input_options() self.coverage_id = self.session.get_coverage_id() self.import_order = self._set_option(self.options, 'import_order', self.DEFAULT_IMPORT_ORDER) self.wms_import = self._set_option(self.options, 'wms_import', False) self.scale_levels = self._set_option(self.options, 'scale_levels', []) self.grid_cov = False def _init_coverage_options(self): covopts = self.options['coverage'] self.crs = self._set_option(covopts, 'crs', self.DEFAULT_CRS) self._set_option(covopts, 'slicer', {}) self._init_slicer_options(covopts) def _init_input_options(self): # specify a subset of resolutions to ingest inputopts = self.session.get_input() self.resolutions = self._set_option(inputopts, 'resolutions', None) if self.resolutions is None: self.resolutions = self._set_option(inputopts, 'subdatasets', None) if self.resolutions is None: self.resolutions = self.SUBDATASETS # allow to ingest data with only particular crss self.crss = self._set_option(inputopts, 'crss', []) # ingest data if it's the specified levels self.levels = self._set_option(inputopts, 'levels', []) def _init_slicer_options(self, covopts): sliceropts = covopts['slicer'] self._set_option(sliceropts, 'type', 'gdal') self._set_option(sliceropts, 'pixelIsPoint', False) axesopts = self._init_axes_options() if 'axes' in sliceropts: for axis in sliceropts['axes']: if axis not in axesopts: raise RecipeValidationException( "Invalid axis '" + axis + "', expected one of ansi/E/N.") for k in sliceropts['axes'][axis]: axesopts[axis][k] = sliceropts['axes'][axis][k] sliceropts['axes'] = axesopts def _init_axes_options(self): return { 'ansi': { "min": "datetime(regex_extract('${file:path}', '.*?/S2[^_]+_MSI[^_]+_([\\d]+)T[\\d]+_', 1), 'YYYYMMDD')", "gridOrder": 0, "type": "ansidate", "irregular": True, "dataBound": False }, 'E': { "min": "${gdal:minX}", "max": "${gdal:maxX}", "gridOrder": 1, "resolution": "${gdal:resolutionX}" }, 'N': { "min": "${gdal:minY}", "max": "${gdal:maxY}", "gridOrder": 2, "resolution": "${gdal:resolutionY}" } } def _set_option(self, opts, key, default_value): if key not in opts: opts[key] = default_value return opts[key] def _get_importer(self): if self.importer is None: self.importer = MultiImporter(self._get_importers()) return self.importer def _get_importers(self): ret = [] convertors = self._get_convertors() for cov_id, conv in convertors.iteritems(): coverage_slices = conv.coverage_slices importer = Importer(conv.resumer, conv.to_coverage(coverage_slices), self.wms_import, self.scale_levels, self.grid_cov) ret.append(importer) return ret def _get_convertors(self): """ Returns a map of coverage id -> GdalToCoverageConverter """ convertors = {} for f in self.session.get_files(): # This one does not contain any information for geo bounds if not FileUtil.validate_file_path(f.get_filepath()): continue gdal_ds = GDALGmlUtil(f.get_filepath()) subdatasets = self._get_subdatasets(gdal_ds, f) gdal_ds.close() level = self._get_level(f.get_filepath()) if len(self.levels) > 0 and level not in self.levels: # skip file, as it's not in the list of levels provided in the ingredients file log.debug("Skipping " + level + " data") continue crs_code = "" evaluator_slice = None for res in self.resolutions: subds_file = self._get_subdataset_file(subdatasets, res) crs_code = self._get_crs_code(subds_file.get_filepath(), crs_code) if len(self.crss) > 0 and crs_code not in self.crss: # skip CRS, it's not in the list of CRSs provided in the ingredients file log.debug("Skipping data with CRS " + crs_code) continue cov_id = self._get_coverage_id(self.coverage_id, crs_code, level, res) # This file already imported in coverage_id.resume.json self.resumer = Resumer(cov_id) if self.resumer.is_file_imported(f.filepath): continue conv = self._get_convertor(convertors, cov_id, crs_code, level, res) file_pair = FilePair(subds_file.filepath, f.filepath) conv.files = [file_pair] crs_axes = CRSUtil(conv.crs).get_axes(self.coverage_id) if evaluator_slice is None: # This one contains information for geo bounds evaluator_slice = EvaluatorSliceFactory.get_evaluator_slice( GdalToCoverageConverter.RECIPE_TYPE, subds_file) # Resolution 10m, 20m and 60m have same data type (UInt16) while TCI has data type (Byte) if res == self.RES_TCI: conv.data_type = "Byte" else: conv.data_type = "UInt16" # Fixed values for 3 axes of Sentinel 2 coverage axis_resolutions = self.RES_DICT[res] slices = conv._create_coverage_slices(crs_axes, evaluator_slice, axis_resolutions) conv.coverage_slices += slices return convertors def _get_subdatasets(self, gdal_ds, f): subdatasets = gdal_ds.get_subdatasets() if len(subdatasets) != len(self.SUBDATASETS): raise RuntimeException("Cannot handle Sentinel 2 file " + f.get_filepath() + ": GDAL reported " + str(len(subdatasets)) + " subdatasets, expected " + str(len(self.SUBDATASETS)) + ".") return [name for (name, _) in subdatasets] def _get_subdataset_file(self, subdatasets, res): check = ":" + res + ":" for name in subdatasets: if check in name: return File(name) # else not found raise RuntimeException("Resolution (string ':" + res + ":') not found in subdatasets: " + str(subdatasets)) def _get_crs_code(self, subds, crs_code): """ Return the <crs_code> from subds of the form: SENTINEL2_<level>:<file>:<resolution>:EPSG_<crs_code> """ if crs_code == "": parts = subds.split(":EPSG_") if len(parts) != 2: raise RuntimeException( "Cannot determine EPSG code from subdataset " + subds) return parts[1] return crs_code def _get_level(self, file_path): if '_MSIL1C_' in file_path: return self.LVL_L1C elif '_MSIL2A_' in file_path: return self.LVL_L2A else: log.warn("Cannot determine level from collected file: " + file_path + "; assuming L1C.") return self.LVL_L1C def _get_coverage_id(self, cov_id, crs_code, level, resolution): return cov_id.replace(self.VAR_CRS_CODE, crs_code) \ .replace(self.VAR_LEVEL, level) \ .replace(self.VAR_RESOLUTION, resolution) def _get_convertor(self, convertors, cov_id, crs_code, level, res): if not cov_id in convertors: convertors[cov_id] = \ self._create_convertor(convertors, cov_id, crs_code, level, res) return convertors[cov_id] def _create_convertor(self, convertors, cov_id, crs_code, level, res): recipe_type = GdalToCoverageConverter.RECIPE_TYPE sentence_evaluator = SentenceEvaluator(ExpressionEvaluatorFactory()) files = [] crs = self._get_crs(crs_code) bands_metadata_fields = {} axis_metadata_fields = {} default_null_values = [self.DEFAULT_NULL_VALUE] return GdalToCoverageConverter( self.resumer, default_null_values, recipe_type, sentence_evaluator, cov_id, self.BANDS[level][res], files, crs, self._read_axes(crs), self.options['tiling'], self._global_metadata_fields(), self._local_metadata_fields(), bands_metadata_fields, axis_metadata_fields, self._metadata_type(), self.grid_cov, self.import_order) def _get_crs(self, crs_code): crs = self.crs.replace(self.VAR_CRS_CODE, crs_code) return self._resolve_crs(crs) @staticmethod def get_name(): return Recipe.RECIPE_NAME
class Recipe(BaseRecipe): def __init__(self, session): """ The recipe class for regular timeseries. To get an overview of the ingredients needed for this recipe check ingredients/time_series_regular """ super(Recipe, self).__init__(session) self.options = session.get_recipe( )['options'] if "options" in session.get_recipe() else {} self.importer = None self.resumer = Resumer(self.session.get_coverage_id()) validator = GDALValidator(self.session.files) if ConfigManager.skip: self.session.files = validator.get_valid_files() def validate(self): super(Recipe, self).validate() if "time_crs" not in self.options or self.options['time_crs'] == "": raise RecipeValidationException("No valid time crs provided") if 'time_start' not in self.options: raise RecipeValidationException( "No valid time start parameter provided") if 'time_step' not in self.options: raise RecipeValidationException( "You have to provide a valid time step indicating both the value and the unit of time" ) if 'band_names' not in self.options: self.options['band_names'] = None def describe(self): """ Implementation of the base recipe describe method """ importer = self._get_importer() slices = importer.get_slices_for_description() number_of_files = len(slices) log.info( "All files have been analyzed. Please verify that the axis subsets of the first {} files above are correct." .format(number_of_files)) index = 1 for slice in slices: log.info("Slice " + str(index) + ": " + str(slice)) index += 1 def ingest(self): """ Ingests the input files """ self._get_importer().ingest() def status(self): """ Implementation of the status method :rtype (int, int) """ return self._get_importer().get_progress() def _generate_timeseries_tuples(self, limit=None): """ Generate the timeseries tuples from the original files based on the recipe. And sort the files in order of time. :rtype: list[TimeFileTuple] """ ret = [] if limit is None: limit = len(self.session.get_files()) time_offset = 0 time_format = self.options[ 'time_format'] if self.options['time_format'] != "auto" else None time_start = DateTimeUtil(self.options['time_start'], time_format, self.options['time_crs']) for tfile in self.session.get_files(): if len(ret) == limit: break time_tuple = TimeFileTuple( self._get_datetime_with_step(time_start, time_offset), tfile) ret.append(time_tuple) time_offset += 1 # Currently, only sort by datetime to import coverage slices (default is ascending), option: to sort descending if self.options[ "import_order"] == AbstractToCoverageConverter.IMPORT_ORDER_DESCENDING: return sorted(ret, reverse=True) return sorted(ret) def _get_datetime_with_step(self, current, offset): """ Returns the new datetime :param DateTimeUtil current: the date to add the step :param int offset: the number of steps to make """ days, hours, minutes, seconds = tuple( [offset * item for item in self._get_real_step()]) return DateTimeUtil( current.datetime.replace(days=+days, hours=+hours, minutes=+minutes, seconds=+seconds).isoformat(), None, self.options['time_crs']) def _get_real_step(self): res = re.search( "([0-9]*[\s]*days)?[\s]*" "([0-9]*[\s]*hours)?[\s]*" "([0-9]*[\s]*minutes)?[\s]*" "([0-9]*[\s]*seconds)?[\s]*", self.options['time_step']) days_s = res.group(1) hours_s = res.group(2) minutes_s = res.group(3) seconds_s = res.group(4) if days_s is None and hours_s is None and minutes_s is None and seconds_s is None: raise RuntimeException( 'The time step does not have a valid unit of measure. ' 'Example of a valid time step: 1 days 2 hours 10 seconds') days = (int(days_s.replace("days", "").strip()) if days_s is not None else 0) hours = (int(hours_s.replace("hours", "").strip()) if hours_s is not None else 0) minutes = (int(minutes_s.replace("minutes", "").strip()) if minutes_s is not None else 0) seconds = (int(seconds_s.replace("seconds", "").strip()) if seconds_s is not None else 0) return days, hours, minutes, seconds def _get_slices(self, crs): """ Returns the slices for the collection of files given """ crs_axes = CRSUtil(crs).get_axes(self.session.coverage_id) slices = [] timeseries = self._generate_timeseries_tuples() count = 1 for tpair in timeseries: file_path = tpair.file.get_filepath() # NOTE: don't process any imported file from *.resume.json as it is just waisted time if not self.resumer.is_file_imported(file_path): timer = Timer() # print which file is analyzing FileUtil.print_feedback(count, len(timeseries), file_path) if not FileUtil.validate_file_path(file_path): continue valid_coverage_slice = True try: subsets = GdalAxisFiller(crs_axes, GDALGmlUtil(file_path)).fill(True) subsets = self._fill_time_axis(tpair, subsets) except Exception as ex: # If skip: true then just ignore this file from importing, else raise exception FileUtil.ignore_coverage_slice_from_file_if_possible( file_path, ex) valid_coverage_slice = False if valid_coverage_slice: slices.append(Slice(subsets, FileDataProvider(tpair.file))) timer.print_elapsed_time() count += 1 return slices def _fill_time_axis(self, tpair, subsets): """ Fills the time axis parameters :param TimeFileTuple tpair: the input pair :param list[AxisSubset] subsets: the axis subsets for the tpair """ days, hours, minutes, seconds = self._get_real_step() number_of_days = days + hours / float(24) + minutes / float( 60 * 24) + seconds / float(60 * 60 * 24) for i in range(0, len(subsets)): if subsets[i].coverage_axis.axis.crs_axis is not None and subsets[ i].coverage_axis.axis.crs_axis.is_time_axis(): subsets[i].coverage_axis.axis = RegularAxis( subsets[i].coverage_axis.axis.label, subsets[i].coverage_axis.axis.uomLabel, tpair.time.to_string(), tpair.time.to_string(), tpair.time.to_string(), subsets[i].coverage_axis.axis.crs_axis) subsets[i].coverage_axis.grid_axis.resolution = number_of_days subsets[i].interval.low = tpair.time.to_string() return subsets def _get_coverage(self): """ Returns the coverage to be used for the importer """ gdal_dataset = GDALGmlUtil.open_gdal_dataset_from_any_file( self.session.get_files()) crs = CRSUtil.get_compound_crs( [self.options['time_crs'], gdal_dataset.get_crs()]) slices = self._get_slices(crs) fields = GdalRangeFieldsGenerator( gdal_dataset, self.options['band_names']).get_range_fields() coverage = Coverage(self.session.get_coverage_id(), slices, fields, crs, gdal_dataset.get_band_gdal_type(), self.options['tiling']) return coverage def _get_importer(self): if self.importer is None: self.importer = Importer(self.resumer, self._get_coverage(), self.options['wms_import'], self.options['scale_levels']) return self.importer @staticmethod def get_name(): return "time_series_regular"
class Importer: def __init__(self, coverage, insert_into_wms=False, scale_levels=None, grid_coverage=False): """ Imports a coverage into wcst :param Coverage coverage: the coverage to be imported """ self.coverage = coverage self.resumer = Resumer(coverage.coverage_id) self.coverage.slices = SliceRestricter( self.resumer.eliminate_already_imported_slices( self.coverage.slices)).get_slices() self.processed = 0 self.total = len(coverage.slices) self.insert_into_wms = insert_into_wms self.scale_levels = scale_levels self.grid_coverage = grid_coverage def ingest(self): """ Ingests the given coverage """ if len(self.coverage.slices) > 0: if self._is_insert(): self._initialize_coverage() # Insert the remaining slices self._insert_slices() if self.insert_into_wms: self._insert_update_into_wms() def get_progress(self): """ Returns the progress of the import :rtype: tuple """ if self.total == 0: log.warn("No slices to import.") return -1, -1 return self.processed, self.total def _insert_slice(self, current): """ Inserts one slice :param Slice current: the slice to be imported """ current_exception = None current_str = "" for attempt in range(0, ConfigManager.retries): try: current_str = str(current) file = self._generate_gml_slice(current) subsets = self._get_update_subsets_for_slice(current) request = WCSTUpdateRequest(self.coverage.coverage_id, file.get_url(), subsets, ConfigManager.insitu) executor = ConfigManager.executor executor.execute(request, mock=ConfigManager.mock) file.release() self.resumer.add_imported_data(current.data_provider) except Exception as e: log.warn( "\nException thrown when trying to insert slice: \n" + current_str + "Retrying, you can safely ignore the warning for now. Tried " + str(attempt + 1) + " times.\n") current_exception = e sleep(ConfigManager.retry_sleep) pass else: break else: log.warn("\nFailed to insert slice. Attempted " + str(ConfigManager.retries) + " times.") raise current_exception def get_slices_for_description(self): """ Returns a list with the first slices to be used in the import description :rtype: list[Slice] """ slices = [] # If number of files < 5 print all files, or only print first 5 files max = ConfigManager.description_max_no_slices if ConfigManager.description_max_no_slices < len( self.coverage.slices) else len(self.coverage.slices) for i in range(0, max): slices.append(self.coverage.slices[i]) return slices def _insert_slices(self): """ Insert the slices of the coverage """ is_loggable = True is_ingest_file = True file_name = "" try: log_file = open( ConfigManager.resumer_dir_path + "/" + ConfigManager.ingredient_file_name + ".log", "a+") log_file.write( "\n-------------------------------------------------------------------------------------" ) log_file.write("\nIngesting coverage '" + self.coverage.coverage_id + "'...") except Exception as e: is_loggable = False log.warn( "\nCannot create log file for this ingestion process, only log to console." ) for i in range(self.processed, self.total): try: # Log the time to send the slice (file) to server to ingest # NOTE: in case of using wcs_extract recipe, it will fetch file from server, so don't know the file size if hasattr(self.coverage.slices[i].data_provider, "file"): file_path = self.coverage.slices[ i].data_provider.file.filepath file_size_in_mb = round( (float)(os.path.getsize(file_path)) / (1000 * 1000), 2) file_name = os.path.basename(file_path) start_time = time.time() self._insert_slice(self.coverage.slices[i]) end_time = time.time() time_to_ingest = round(end_time - start_time, 2) if time_to_ingest < 0.0000001: time_to_ingest = 0.0000001 size_per_second = round(file_size_in_mb / time_to_ingest, 2) log_text = "\nFile '" + file_name + "' with size " + str(file_size_in_mb) + " MB; " \ "Total time to ingest " + str(time_to_ingest) + "s @ " + str(size_per_second) + " MB/s." # write to console log.info(log_text) if is_loggable: # write to log file log_file.write(log_text) else: is_ingest_file = False # extract coverage from petascope to ingest a new coverage start_time = time.time() self._insert_slice(self.coverage.slices[i]) end_time = time.time() time_to_ingest = round(end_time - start_time, 2) log.info("\nTotal time to ingest: " + str(time_to_ingest) + " s.") except Exception as e: if ConfigManager.skip: log.warn("Skipped slice " + str(self.coverage.slices[i])) if is_loggable and is_ingest_file: log_file.write("\nSkipped file: " + file_name + ".") log_file.write("\nReason: " + str(e)) else: if is_loggable and is_ingest_file: log_file.write("\nError file: " + file_name + ".") log_file.write("\nReason: " + str(e)) log_file.write("\nResult: failed.") log_file.close() raise e self.processed += 1 log_file.write("\nResult: success.") log_file.close() def _initialize_coverage(self): """ Initializes the coverage """ file = self._generate_initial_gml_slice() request = WCSTInsertRequest(file.get_url(), False, self.coverage.pixel_data_type, self.coverage.tiling) executor = ConfigManager.executor current_insitu_value = executor.insitu executor.insitu = None executor.execute(request, mock=ConfigManager.mock) executor.insitu = current_insitu_value file.release() # If scale_levels specified in ingredient files, send the query to Petascope to create downscaled collections if self.scale_levels: # Levels be ascending order sorted_list = sorted(self.scale_levels) # NOTE: each level is processed separately with each HTTP request for level in sorted_list: request = WCSTInsertScaleLevelsRequest( self.coverage.coverage_id, level) executor.execute(request, mock=ConfigManager.mock) def _get_update_subsets_for_slice(self, slice): """ Returns the given slice's interval as a list of wcst subsets :param slice: the slice for which to generate this :rtype: list[WCSTSubset] """ subsets = [] for axis_subset in slice.axis_subsets: low = axis_subset.interval.low high = axis_subset.interval.high # if ConfigManager.subset_correction and high is not None and low != high and type(low) != str: if ConfigManager.subset_correction and high is not None and low != high and type( low) == str: # Time axis with type = str (e.g: "1970-01-01T02:03:06Z") time_seconds = 1 # AnsiDate (need to change from date to seconds) if axis_subset.coverage_axis.axis.crs_axis.is_uom_day(): time_seconds = DateTimeUtil.DAY_IN_SECONDS low = decimal.Decimal(str( arrow.get(low).float_timestamp)) + decimal.Decimal( str(axis_subset.coverage_axis.grid_axis.resolution * time_seconds)) / 2 low = DateTimeUtil.get_datetime_iso(low) if high is not None: high = decimal.Decimal(str( arrow.get(high).float_timestamp)) - decimal.Decimal( str(axis_subset.coverage_axis.grid_axis.resolution * time_seconds)) / 2 high = DateTimeUtil.get_datetime_iso(high) elif ConfigManager.subset_correction and high is not None and low != high and type( low) != str: # regular axes (e.g: latitude, longitude, index1d) low = decimal.Decimal(str(low)) + decimal.Decimal( str(axis_subset.coverage_axis.grid_axis.resolution)) / 2 if high is not None: high = decimal.Decimal(str(high)) - decimal.Decimal( str(axis_subset.coverage_axis.grid_axis.resolution)) / 2 subsets.append( WCSTSubset(axis_subset.coverage_axis.axis.label, low, high)) return subsets def _get_update_crs(self, slice, crs): """ Returns the crs corresponding to the axes that are data bound :param slice: the slice for which the gml should be created :param crs: the crs of the coverage :return: String """ crsAxes = [] for axis_subset in slice.axis_subsets: if axis_subset.coverage_axis.data_bound: crsAxes.append(axis_subset.coverage_axis.axis.crs_axis) crsUtil = CRSUtil(crs) return crsUtil.get_crs_for_axes(crsAxes) def _generate_gml_slice(self, slice): """ Generates the gml for a regular slice :param slice: the slice for which the gml should be created :rtype: File """ metadata_provider = MetadataProvider( self.coverage.coverage_id, self._get_update_axes(slice), self.coverage.range_fields, self._get_update_crs(slice, self.coverage.crs), None, self.grid_coverage) data_provider = slice.data_provider file = Mediator(metadata_provider, data_provider).get_gml_file() return file def _get_update_axes(self, slice): """ Returns the axes for the slices that are bound to the data (e.g. Lat and Long for a 2-D raster) :param slice: the slice for which the gml should be created :rtype: dict[Axis, GridAxis] """ axes = OrderedDict() for axis_subset in slice.axis_subsets: if axis_subset.coverage_axis.data_bound: axes[axis_subset.coverage_axis. axis] = axis_subset.coverage_axis.grid_axis return axes def _generate_initial_gml_slice(self): """ Returns the initial slice in gml format :rtype: File """ return self._generate_initial_gml_db() def _generate_initial_gml_db(self): """ Generates the initial slice in gml for importing using the database method and returns the gml for it :rtype: File """ # Transform the axes domains such that only a point is defined. # For the first slice we need to import a single point, which will then be updated with the real data axes_map = OrderedDict() for axis, grid_axis in self.coverage.get_insert_axes().iteritems(): if axis.coefficient is not None: assert type(axis.coefficient ) == list, "Axis coefficients not of type list." assert len( axis.coefficient) > 0, "The list of coefficients is empty." # Get the first coefficient in irregular coverage to create a initial slice axis = IrregularAxis(axis.label, axis.uomLabel, axis.low, axis.high, axis.origin, [axis.coefficient[0]], axis.crs_axis) axes_map[axis] = GridAxis(grid_axis.order, grid_axis.label, grid_axis.resolution, 0, 0) metadata_provider = MetadataProvider( self.coverage.coverage_id, axes_map, self.coverage.range_fields, self.coverage.crs, self.coverage.metadata, self.grid_coverage) tuple_list = ",".join(['0'] * len(self.coverage.range_fields)) data_provider = TupleListDataProvider(tuple_list) file = Mediator(metadata_provider, data_provider).get_gml_file() return file def _generate_initial_gml_inistu(self): """ Generates the initial slice in gml for importing using the insitu method and returns the gml file for it :rtype: File """ metadata_provider = MetadataProvider(self.coverage.coverage_id, self.coverage.get_insert_axes(), self.coverage.range_fields, self.coverage.crs, self.coverage.metadata, self.grid_coverage) data_provider = self.coverage.slices[0].data_provider file = Mediator(metadata_provider, data_provider).get_gml_file() self.processed += 1 self.resumer.add_imported_data(data_provider) return file def _insert_update_into_wms(self): """ Inserts or Update the coverage into the wms service """ try: # First check from WMS GetCapabilities if layer name (coverage id) existed request = WMSTGetCapabilities() response = ConfigManager.executor.execute(request) root_element = etree.fromstring(response) namespace = {"wms": "http://www.opengis.net/wms"} exist = root_element.xpath( "//wms:Capability/wms:Layer/wms:Layer/wms:Name/text()='" + self.coverage.coverage_id + "'", namespaces=namespace) # WMS layer does not exist, just insert new WMS layer from imported coverage if exist is False: request = WMSTFromWCSInsertRequest(self.coverage.coverage_id, False) else: # WMS layer existed, update WMS layer from updated coverage request = WMSTFromWCSUpdateRequest(self.coverage.coverage_id, False) ConfigManager.executor.execute(request, mock=ConfigManager.mock) except Exception as e: log.error( "Exception thrown when importing in WMS. Please try to reimport in WMS manually." ) raise e def _is_insert(self): """ Returns true if the coverage should be inserted, false if only updates are needed :rtype: bool """ cov = CoverageUtil(self.coverage.coverage_id) return not cov.exists()