Exemple #1
0
 def filter_imported_files(self):
     """
     Filer all imported files from coverage_id.resume.json
     """
     resumer = Resumer(self.coverage_id)
     not_imported_files = resumer.get_not_imported_files(self.files)
     self.files = not_imported_files
Exemple #2
0
    def __init__(self, session):
        """
        The recipe class for regular timeseries. To get an overview of the ingredients needed for this
        recipe check ingredients/time_series_regular
        """
        super(Recipe, self).__init__(session)
        self.options = session.get_recipe(
        )['options'] if "options" in session.get_recipe() else {}
        self.importer = None
        self.resumer = Resumer(self.session.get_coverage_id())

        validator = GDALValidator(self.session.files)
        if ConfigManager.skip:
            self.session.files = validator.get_valid_files()
Exemple #3
0
    def __init__(self, session):
        """
        The recipe class for map_mosaic. To get an overview of the ingredients needed for this
        recipe check ingredients/map_mosaic
        :param Session session: the session for this import
        """
        super(Recipe, self).__init__(session)
        self.options = session.get_recipe()['options'] if "options" in session.get_recipe() else {}
        self.importer = None
        self.resumer = Resumer(self.session.get_coverage_id())

        validator = GDALValidator(self.session.files)
        if  ConfigManager.skip == True:
            self.session.files = validator.get_valid_files()
Exemple #4
0
    def __init__(self, session):
        """
        The recipe class for map_mosaic. To get an overview of the ingredients needed for this
        recipe check ingredients/map_mosaic
        :param Session session: the session for this import
        """
        super(Recipe, self).__init__(session)
        self.options = session.get_recipe()['options'] if "options" in session.get_recipe() else {}
        self.importer = None
        self.resumer = Resumer(self.session.get_coverage_id())

        self.recipe_type = GdalToCoverageConverter.RECIPE_TYPE
        if "coverage" in self.options:
            self.options['coverage']['slicer'] = {}
            self.options['coverage']['slicer']['type'] = GdalToCoverageConverter.RECIPE_TYPE
Exemple #5
0
 def __init__(self, session):
     """
     :param Session session: the session for this import
     """
     super(Recipe, self).__init__(session)
     self.options = session.get_recipe()['options'] if "options" in session.get_recipe() else {}
     self.importer = None
     self.resumer = Resumer(self.session.get_coverage_id())
Exemple #6
0
 def __init__(self, session):
     """
     The recipe class for my_custom_recipe (check wcst_import guide from rasdaman web page for more details).
     :param Session session: the session for the import tun
     """
     super(Recipe, self).__init__(session)
     self.options = session.get_recipe()['options']
     self.importer = None
     self.resumer = Resumer(self.session.get_coverage_id())
Exemple #7
0
 def __init__(self,
              coverage,
              insert_into_wms=False,
              scale_levels=None,
              grid_coverage=False):
     """
     Imports a coverage into wcst
     :param Coverage coverage: the coverage to be imported
     """
     self.coverage = coverage
     self.resumer = Resumer(coverage.coverage_id)
     self.coverage.slices = SliceRestricter(
         self.resumer.eliminate_already_imported_slices(
             self.coverage.slices)).get_slices()
     self.processed = 0
     self.total = len(coverage.slices)
     self.insert_into_wms = insert_into_wms
     self.scale_levels = scale_levels
     self.grid_coverage = grid_coverage
Exemple #8
0
    def _get_convertors(self):
        """
        Returns a map of coverage id -> GdalToCoverageConverter
        """
        convertors = {}

        band_data_type = self.DEFAULT_BAND_DATA_TYPE
        if self.product == self.SLC_PRODUCT:
            band_data_type = self.SLC_BAND_DATA_TYPE

        for file in self.session.get_files():

            # Check if this file still exists when preparing to import
            if not FileUtil.validate_file_path(file.get_filepath()):
                continue

            # Check if this file belongs to this coverage id
            modebeam, polarisation = self._get_modebeam_polarisation(
                file.filepath)
            cov_id = self._get_coverage_id(self.coverage_id, modebeam,
                                           polarisation)

            # This file already imported in coverage_id.resume.json
            self.resumer = Resumer(cov_id)
            if self.resumer.is_file_imported(file.filepath):
                continue

            conv = self._get_convertor(convertors, cov_id)

            file_pair = FilePair(file.filepath, file.filepath)

            conv.files = [file_pair]
            crs_axes = CRSUtil(conv.crs).get_axes(self.coverage_id)

            # Different file contains different datetime from its name
            evaluator_slice = EvaluatorSliceFactory.get_evaluator_slice(
                GdalToCoverageConverter.RECIPE_TYPE, file)

            conv.data_type = band_data_type
            slices = conv._create_coverage_slices(crs_axes, evaluator_slice)
            conv.coverage_slices += slices

        return convertors
Exemple #9
0
 def __init__(self, session):
     """
     The recipe class for wcs. To get an overview of the ingredients needed for this
     recipe check ingredients/map_mosaic
     :param Session session: the session for this import
     """
     super(Recipe, self).__init__(session)
     self.options = session.get_recipe()['options'] if "options" in session.get_recipe() else {}
     self.importer = None
     self.resumer = Resumer(self.session.get_coverage_id())
     self.coverage = None
Exemple #10
0
 def __init__(self, coverage, insert_into_wms=False):
     """
     Imports a coverage into wcst
     :param Coverage coverage: the coverage to be imported
     """
     self.coverage = coverage
     self.resumer = Resumer(coverage.coverage_id)
     self.coverage.slices = SliceRestricter(
         self.resumer.eliminate_already_imported_slices(self.coverage.slices)).get_slices()
     self.processed = 0
     self.total = len(coverage.slices)
     self.insert_into_wms = insert_into_wms
Exemple #11
0
    def __init__(self, session):
        """
        :param Session session: the session for this import
        """
        super(Recipe, self).__init__(session)
        self.session = session
        self.options = session.get_recipe()['options']
        self.validate()

        self.coverage_id = self.session.get_input()["coverage_id"]
        self.resumer = Resumer(self.coverage_id)
        self.source_coverage_ids = self.parse_source_coverage_ids(
            self.session.get_input()["source_coverage_ids"])
        self.envelope = self.options["envelope"]
        self.srs_name = XMLUtil.escape(self.envelope["srsName"])
        # array of axis
        self.axes = self.envelope["axis"]
        self.import_wms = self.options[
            "wms_import"] if "wms_import" in self.options else None
Exemple #12
0
    def _get_convertors(self):
        """
        Returns a map of coverage id -> GdalToCoverageConverter
        """
        convertors = {}
        for f in self.session.get_files():
            # This one does not contain any information for geo bounds
            if not FileUtil.validate_file_path(f.get_filepath()):
                continue

            gdal_ds = GDALGmlUtil(f.get_filepath())
            subdatasets = self._get_subdatasets(gdal_ds, f)
            gdal_ds.close()

            level = self._get_level(f.get_filepath())
            if len(self.levels) > 0 and level not in self.levels:
                # skip file, as it's not in the list of levels provided in the ingredients file
                log.debug("Skipping " + level + " data")
                continue
            crs_code = ""

            evaluator_slice = None

            for res in self.resolutions:
                subds_file = self._get_subdataset_file(subdatasets, res)
                crs_code = self._get_crs_code(subds_file.get_filepath(),
                                              crs_code)
                if len(self.crss) > 0 and crs_code not in self.crss:
                    # skip CRS, it's not in the list of CRSs provided in the ingredients file
                    log.debug("Skipping data with CRS " + crs_code)
                    continue
                cov_id = self._get_coverage_id(self.coverage_id, crs_code,
                                               level, res)

                # This file already imported in coverage_id.resume.json
                self.resumer = Resumer(cov_id)
                if self.resumer.is_file_imported(f.filepath):
                    continue

                conv = self._get_convertor(convertors, cov_id, crs_code, level,
                                           res)

                file_pair = FilePair(subds_file.filepath, f.filepath)

                conv.files = [file_pair]
                crs_axes = CRSUtil(conv.crs).get_axes(self.coverage_id)

                if evaluator_slice is None:
                    # This one contains information for geo bounds
                    evaluator_slice = EvaluatorSliceFactory.get_evaluator_slice(
                        GdalToCoverageConverter.RECIPE_TYPE, subds_file)

                # Resolution 10m, 20m and 60m have same data type (UInt16) while TCI has data type (Byte)
                if res == self.RES_TCI:
                    conv.data_type = "Byte"
                else:
                    conv.data_type = "UInt16"

                # Fixed values for 3 axes of Sentinel 2 coverage
                axis_resolutions = self.RES_DICT[res]
                slices = conv._create_coverage_slices(crs_axes,
                                                      evaluator_slice,
                                                      axis_resolutions)
                conv.coverage_slices += slices

        return convertors
Exemple #13
0
class Recipe(BaseRecipe):
    def __init__(self, session):
        """
            The recipe class for irregular timeseries. To get an overview of the ingredients needed for this
            recipe check ingredients/time_series_irregular
            """
        super(Recipe, self).__init__(session)
        self.options = session.get_recipe(
        )['options'] if "options" in session.get_recipe() else {}
        self.importer = None
        self.resumer = Resumer(self.session.get_coverage_id())

        self.recipe_type = GdalToCoverageConverter.RECIPE_TYPE
        if "coverage" in self.options:
            self.options['coverage']['slicer'] = {}
            self.options['coverage']['slicer'][
                'type'] = GdalToCoverageConverter.RECIPE_TYPE

    def validate(self):
        super(Recipe, self).validate()

        if "time_crs" not in self.options or self.options['time_crs'] == "":
            raise RecipeValidationException("No valid time crs provided")

        if 'time_parameter' not in self.options:
            raise RecipeValidationException("No valid time parameter provided")

        if 'metadata_tag' not in self.options[
                'time_parameter'] and 'filename' not in self.options[
                    'time_parameter']:
            raise RecipeValidationException(
                "You have to provide either a metadata_tag or a filename pattern for the time parameter"
            )

        if 'datetime_format' not in self.options['time_parameter']:
            raise RecipeValidationException(
                "No valid datetime_format provided")

        if 'metadata_tag' in self.options['time_parameter'] and \
                        "tag_name" not in self.options['time_parameter']['metadata_tag']:
            raise RecipeValidationException(
                "No metadata tag to extract time from gdal was provided")

        if 'filename' in self.options['time_parameter'] \
                and self.options['time_parameter']['filename']['regex'] == "" \
                and self.options['time_parameter']['filename']['group'] == "":
            raise RecipeValidationException(
                "No filename regex and group to extract time from gdal was provided"
            )

        if 'band_names' not in self.options:
            self.options['band_names'] = None

    def describe(self):
        """
        Implementation of the base recipe describe method
        """
        importer = self._get_importer()

        slices = importer.get_slices_for_description()
        number_of_files = len(slices)
        log.info(
            "All files have been analyzed. Please verify that the axis subsets of the first {} files above are correct."
            .format(number_of_files))
        index = 1
        for slice in slices:
            log.info("Slice " + str(index) + ": " + str(slice))
            index += 1

    def ingest(self):
        """
        Ingests the input files
        """
        importer = self._get_importer()
        importer.ingest()

    def status(self):
        """
        Implementation of the status method
        :rtype (int, int)
        """
        return self._get_importer().get_progress()

    def _generate_timeseries_tuples(self, limit=None):
        """
        Generate the timeseries tuples from the original files based on the recipe.
        And sort the files in order of time.
        :rtype: list[TimeFileTuple]
        """
        ret = []
        if limit is None:
            limit = len(self.session.get_files())

        time_format = None
        if 'datetime_format' in self.options['time_parameter']:
            time_format = self.options['time_parameter']['datetime_format']

        if 'metadata_tag' in self.options['time_parameter']:
            mtag = self.options['time_parameter']['metadata_tag']['tag_name']
            for tfile in self.session.get_files():
                if len(ret) == limit:
                    break

                valid_file = True

                try:
                    gdal_file = GDALGmlUtil(tfile.get_filepath())
                except Exception as ex:
                    FileUtil.ignore_coverage_slice_from_file_if_possible(
                        tfile.get_filepath(), ex)
                    valid_file = False

                if valid_file:
                    dtutil = DateTimeUtil(gdal_file.get_datetime(mtag),
                                          time_format,
                                          self.options['time_crs'])
                    ret.append(TimeFileTuple(dtutil, tfile))
        elif 'filename' in self.options['time_parameter'] and len(ret) < limit:
            regex = self.options['time_parameter']['filename']['regex']
            group = int(self.options['time_parameter']['filename']['group'])
            for tfile in self.session.get_files():
                if len(ret) == limit:
                    break
                dtutil = DateTimeUtil(
                    re.search(regex, tfile.filepath).group(group), time_format,
                    self.options['time_crs'])
                ret.append(TimeFileTuple(dtutil, tfile))
        else:
            raise RecipeValidationException(
                "No method to get the time parameter, you should either choose "
                "metadata_tag or filename.")

        # Currently, only sort by datetime to import coverage slices (default is ascending), option: to sort descending
        if self.options[
                "import_order"] == AbstractToCoverageConverter.IMPORT_ORDER_DESCENDING:
            return sorted(ret, reverse=True)

        return sorted(ret)

    def _get_coverage_slices(self, crs, gdal_coverage_converter):
        """
        Returns the slices for the collection of files given
        """
        crs_axes = CRSUtil(crs).get_axes(self.session.coverage_id)

        slices = []
        timeseries = self._generate_timeseries_tuples()
        count = 1
        for tpair in timeseries:
            file_path = tpair.file.get_filepath()

            # NOTE: don't process any imported file from *.resume.json as it is just waisted time
            if not self.resumer.is_file_imported(file_path):
                timer = Timer()

                # print which file is analyzing
                FileUtil.print_feedback(count, len(timeseries), file_path)

                if not FileUtil.validate_file_path(file_path):
                    continue

                valid_coverage_slice = True
                try:
                    subsets = GdalAxisFiller(crs_axes,
                                             GDALGmlUtil(file_path)).fill(True)
                    subsets = self._fill_time_axis(tpair, subsets)
                except Exception as ex:
                    # If skip: true then just ignore this file from importing, else raise exception
                    FileUtil.ignore_coverage_slice_from_file_if_possible(
                        file_path, ex)
                    valid_coverage_slice = False

                if valid_coverage_slice:
                    # Generate local metadata string for current coverage slice
                    self.evaluator_slice = EvaluatorSliceFactory.get_evaluator_slice(
                        self.recipe_type, tpair.file)
                    local_metadata = gdal_coverage_converter._generate_local_metadata(
                        subsets, self.evaluator_slice)
                    slices.append(
                        Slice(subsets, FileDataProvider(tpair.file),
                              local_metadata))

                timer.print_elapsed_time()
                count += 1

        return slices

    def _fill_time_axis(self, tpair, subsets):
        """
        Fills the time axis parameters
        :param TimeFileTuple tpair: the input pair
        :param list[AxisSubset] subsets: the axis subsets for the tpair
        """
        for i in range(0, len(subsets)):
            if subsets[i].coverage_axis.axis.crs_axis is not None and subsets[
                    i].coverage_axis.axis.crs_axis.is_time_axis():
                subsets[i].coverage_axis.axis = IrregularAxis(
                    subsets[i].coverage_axis.axis.label,
                    subsets[i].coverage_axis.axis.uomLabel,
                    tpair.time.to_string(), tpair.time.to_string(),
                    tpair.time.to_string(), [0],
                    subsets[i].coverage_axis.axis.crs_axis)
                subsets[i].coverage_axis.grid_axis.resolution = 1
                subsets[i].interval.low = tpair.time
        return subsets

    def _get_coverage(self):
        """
        Returns the coverage to be used for the importer
        """
        gdal_dataset = GDALGmlUtil.open_gdal_dataset_from_any_file(
            self.session.get_files())
        crs = CRSUtil.get_compound_crs(
            [self.options['time_crs'],
             gdal_dataset.get_crs()])

        general_recipe = GeneralRecipe(self.session)
        global_metadata_fields = general_recipe._global_metadata_fields()
        local_metadata_fields = general_recipe._local_metadata_fields()

        sentence_evaluator = SentenceEvaluator(ExpressionEvaluatorFactory())

        gdal_coverage_converter = GdalToCoverageConverter(
            self.resumer, self.session.get_default_null_values(),
            self.recipe_type,
            sentence_evaluator, self.session.get_coverage_id(), None,
            self.session.get_files(), crs, None, None,
            global_metadata_fields, local_metadata_fields, None, None,
            general_recipe._metadata_type(), None, None)

        coverage_slices = self._get_coverage_slices(crs,
                                                    gdal_coverage_converter)
        fields = GdalRangeFieldsGenerator(
            gdal_dataset, self.options['band_names']).get_range_fields()

        global_metadata = None
        if len(coverage_slices) > 0:
            global_metadata = gdal_coverage_converter._generate_global_metadata(
                coverage_slices[0], self.evaluator_slice)

        coverage = Coverage(self.session.get_coverage_id(), coverage_slices,
                            fields, crs, gdal_dataset.get_band_gdal_type(),
                            self.options['tiling'], global_metadata)

        return coverage

    def _get_importer(self):
        if self.importer is None:
            self.importer = Importer(self.resumer, self._get_coverage(),
                                     self.options['wms_import'],
                                     self.options['scale_levels'])
        return self.importer

    @staticmethod
    def get_name():
        return "time_series_irregular"
Exemple #14
0
class Importer:
    def __init__(self, coverage, insert_into_wms=False):
        """
        Imports a coverage into wcst
        :param Coverage coverage: the coverage to be imported
        """
        self.coverage = coverage
        self.resumer = Resumer(coverage.coverage_id)
        self.coverage.slices = SliceRestricter(
            self.resumer.eliminate_already_imported_slices(self.coverage.slices)).get_slices()
        self.processed = 0
        self.total = len(coverage.slices)
        self.insert_into_wms = insert_into_wms

    def ingest(self):
        """
        Ingests the given coverage
        """
        if len(self.coverage.slices) > 0:
            if self._is_insert():
                self._initialize_coverage()

            # Insert the remaining slices
            self._insert_slices()

            if self.insert_into_wms:
                self._insert_into_wms()

    def get_progress(self):
        """
        Returns the progress of the import
        :rtype: tuple
        """
        if self.total == 0:
            log.warn("No slices to import.")
            return -1, -1
        return self.processed, self.total

    def _insert_slice(self, current):
        """
        Inserts one slice
        :param Slice current: the slice to be imported
        """
        current_exception = None
        current_str = ""
        for attempt in range(0, ConfigManager.retries):
            try:
                current_str = str(current)
                file = self._generate_gml_slice(current)
                subsets = self._get_update_subsets_for_slice(current)
                request = WCSTUpdateRequest(self.coverage.coverage_id, file.get_url(), subsets, ConfigManager.insitu)
                executor = ConfigManager.executor
                executor.execute(request)
                file.release()
                self.resumer.add_imported_data(current.data_provider)
            except Exception as e:
                log.warn(
                    "\nException thrown when trying to insert slice: \n" + current_str + "Retrying, you can safely ignore the warning for now. Tried " + str(
                        attempt + 1) + " times.\n")
                current_exception = e
                sleep(ConfigManager.retry_sleep)
                pass
            else:
                break
        else:
            log.warn("\nFailed to insert slice. Attempted " + str(ConfigManager.retries) + " times.")
            raise current_exception

    def get_slices_for_description(self):
        """
        Returns a list with the first slices to be used in the import description
        :rtype: list[Slice]
        """
        slices = []
        max = ConfigManager.description_max_no_slices if ConfigManager.description_max_no_slices < len(
            self.coverage.slices) else len(self.coverage.slices)
        for i in range(0, max):
            slices.append(self.coverage.slices[i])
        return slices

    def _insert_slices(self):
        """
        Insert the slices of the coverage
        """
        for i in range(self.processed, self.total):
            try:
                self._insert_slice(self.coverage.slices[i])
            except Exception as e:
                if ConfigManager.skip:
                    log.warn("Skipped slice " + str(self.coverage.slices[i]))
                else:
                    raise e
            self.processed += 1

    def _initialize_coverage(self):
        """
        Initializes the coverage
        """
        file = self._generate_initial_gml_slice()
        request = WCSTInsertRequest(file.get_url(), False, self.coverage.pixel_data_type,
                                    self.coverage.tiling)
        executor = ConfigManager.executor
        current_insitu_value = executor.insitu
        executor.insitu = None
        executor.execute(request)
        executor.insitu = current_insitu_value
        file.release()

    def _get_update_subsets_for_slice(self, slice):
        """
        Returns the given slice's interval as a list of wcst subsets
        :param slice: the slice for which to generate this
        :rtype: list[WCSTSubset]
        """
        subsets = []
        for axis_subset in slice.axis_subsets:
            low = axis_subset.interval.low
            high = axis_subset.interval.high
            if ConfigManager.subset_correction and high is not None and low != high and type(low) != str:
                low += float(axis_subset.coverage_axis.grid_axis.resolution) / 2
                if high is not None:
                    high -= float(axis_subset.coverage_axis.grid_axis.resolution) / 2
            subsets.append(WCSTSubset(axis_subset.coverage_axis.axis.label, low, high))
        return subsets

    def _generate_gml_slice(self, slice):
        """
        Generates the gml for a regular slice
        :param slice: the slice for which the gml should be created
        :rtype: File
        """
        metadata_provider = MetadataProvider(self.coverage.coverage_id, self._get_update_axes(slice),
                                             self.coverage.range_fields, self.coverage.crs, None)
        data_provider = slice.data_provider
        file = Mediator(metadata_provider, data_provider).get_gml_file()
        return file

    def _get_update_axes(self, slice):
        """
        Returns the axes for the slices that are bound to the data (e.g. Lat and Long for a 2-D raster)
        :param slice: the slice for which the gml should be created
        :rtype: dict[Axis, GridAxis]
        """
        axes = OrderedDict()
        for axis_subset in slice.axis_subsets:
            if axis_subset.coverage_axis.data_bound:
                axes[axis_subset.coverage_axis.axis] = axis_subset.coverage_axis.grid_axis
        return axes

    def _generate_initial_gml_slice(self):
        """
        Returns the initial slice in gml format
        :rtype: File
        """
        return self._generate_initial_gml_db()

    def _generate_initial_gml_db(self):
        """
        Generates the initial slice in gml for importing using the database method and returns the gml for it
        :rtype: File
        """
        # Transform the axes domains such that only a point is defined.
        # For the first slice we need to import a single point, which will then be updated with the real data
        axes_map = OrderedDict()
        for axis, grid_axis in self.coverage.get_insert_axes().iteritems():
            if axis.coefficient is not None:
                # Get the first coefficient in irregular coverage  to create a initial slice
                axis.coefficient = [axis.coefficient[0]]
            axes_map[axis] = GridAxis(grid_axis.order, grid_axis.label, grid_axis.resolution, 0, 0)
        metadata_provider = MetadataProvider(self.coverage.coverage_id, axes_map,
                                             self.coverage.range_fields, self.coverage.crs, self.coverage.metadata)
        tuple_list = ",".join(['0'] * len(self.coverage.range_fields))
        data_provider = TupleListDataProvider(tuple_list)
        file = Mediator(metadata_provider, data_provider).get_gml_file()
        return file

    def _generate_initial_gml_inistu(self):
        """
        Generates the initial slice in gml for importing using the insitu method and returns the gml file for it
        :rtype: File
        """
        metadata_provider = MetadataProvider(self.coverage.coverage_id, self.coverage.get_insert_axes(),
                                             self.coverage.range_fields, self.coverage.crs, self.coverage.metadata)
        data_provider = self.coverage.slices[0].data_provider
        file = Mediator(metadata_provider, data_provider).get_gml_file()
        self.processed += 1
        self.resumer.add_imported_data(data_provider)
        return file

    def _insert_into_wms(self):
        """
        Inserts the coverage into the wms service
        """
        try:
            request = WMSTFromWCSInsertRequest(self.coverage.coverage_id, False)
            ConfigManager.executor.execute(request)
        except Exception as e:
            log.error(
                "Exception thrown when importing in WMS. Please try to reimport in WMS manually.")
            raise e

    def _is_insert(self):
        """
        Returns true if the coverage should be inserted, false if only updates are needed
        :rtype: bool
        """
        cov = CoverageUtil(self.coverage.coverage_id)
        return not cov.exists()
Exemple #15
0
class Recipe(BaseRecipe):
    def __init__(self, session):
        """
        The recipe class for map_mosaic. To get an overview of the ingredients needed for this
        recipe check ingredients/map_mosaic
        :param Session session: the session for this import
        """
        super(Recipe, self).__init__(session)
        self.options = session.get_recipe()['options'] if "options" in session.get_recipe() else {}
        self.importer = None
        self.resumer = Resumer(self.session.get_coverage_id())

        validator = GDALValidator(self.session.files)
        if  ConfigManager.skip == True:
            self.session.files = validator.get_valid_files()

    def validate(self):
        """
        Implementation of the base recipe validate method
        """
        super(Recipe, self).validate()

        if 'band_names' not in self.options:
            self.options['band_names'] = None

    def describe(self):
        """
        Implementation of the base recipe describe method
        """
        importer = self._get_importer()

        slices = importer.get_slices_for_description()
        number_of_files = len(slices)
        log.info("All files have been analyzed. Please verify that the axis subsets of the first {} files above are correct.".format(number_of_files))
        index = 1
        for slice in slices:
            log.info("Slice " + str(index) + ": " + str(slice))
            index += 1

    def ingest(self):
        """
        Starts the ingesting process
        """
        importer = self._get_importer()
        importer.ingest()

    def status(self):
        """
        Implementation of the status method
        :rtype (int, int)
        """
        return self._get_importer().get_progress()

    def _get_slices(self, crs):
        """
        Returns the slices for the collection of files given
        """
        files = self.session.get_files()
        crs_axes = CRSUtil(crs).get_axes(self.session.coverage_id)

        slices = []
        count = 1
        for file in files:
            # NOTE: don't process any imported file from *.resume.json as it is just waisted time
            if not self.resumer.is_file_imported(file.filepath):
                timer = Timer()

                # print which file is analyzing
                FileUtil.print_feedback(count, len(files), file.filepath)
                if not FileUtil.validate_file_path(file.filepath):
                    continue

                valid_coverage_slice = True
                try:
                    subsets = GdalAxisFiller(crs_axes, GDALGmlUtil(file.get_filepath())).fill()
                except Exception as ex:
                    # If skip: true then just ignore this file from importing, else raise exception
                    FileUtil.ignore_coverage_slice_from_file_if_possible(file.get_filepath(), ex)
                    valid_coverage_slice = False

                if valid_coverage_slice:
                    slices.append(Slice(subsets, FileDataProvider(file)))

                timer.print_elapsed_time()
                count += 1

        return slices

    def _get_coverage(self):
        """
        Returns the coverage to be used for the importer
        """
        gdal_dataset = GDALGmlUtil.open_gdal_dataset_from_any_file(self.session.get_files())
        crs = gdal_dataset.get_crs()
        slices = self._get_slices(crs)
        fields = GdalRangeFieldsGenerator(gdal_dataset, self.options['band_names']).get_range_fields()
        coverage = Coverage(self.session.get_coverage_id(), slices, fields, gdal_dataset.get_crs(),
            gdal_dataset.get_band_gdal_type(), self.options['tiling'])
        return coverage

    def _get_importer(self):
        if self.importer is None:
            self.importer = Importer(self.resumer, self._get_coverage(), self.options['wms_import'], self.options['scale_levels'], False)
        return self.importer


    @staticmethod
    def get_name():
        return "map_mosaic"
Exemple #16
0
class Recipe(GeneralCoverageRecipe):
    #
    # constants
    #

    RECIPE_NAME = "sentinel1"

    # coverage Id scheme: S1_GRD_${modebeam}_${polarisation}
    # e,g:                S1_GRD_IW/EW      _HH,VV,VH,..

    # Sentinel 1 tiff pattern
    # e.g: s1b-iw-grd-vh-20190324t164346-20190324t164411-015499-01d0a6-002.tiff
    GRD_FILE_PATTERN = "(.*)-(.*)-grd-(.*)-(.*)-(.*)-(.*)-(.*)-(.*).tiff"
    grd_pattern = re.compile(GRD_FILE_PATTERN)

    # variables that can be used to template the coverage id
    VAR_MODEBEAM = '${modebeam}'
    VAR_POLARISATION = '${polarisation}'

    # 1 tiff file contains 1 band
    BAND = UserBand("1", "Grey", "", "", "", [0], "")

    DEFAULT_MODEBEAMS = ["EW", "IW"]
    DEFAULT_POLARISATIONS = ["HH", "HV", "VH", "VV"]

    # Sentinel 1 contains 1 band
    DEFAULT_BAND_DATA_TYPE = "UInt16"

    EPSG_XY_CRS = "$EPSG_XY_CRS"
    CRS_TEMPLATE = "OGC/0/AnsiDate@" + EPSG_XY_CRS

    DEFAULT_IMPORT_ORDER = GdalToCoverageConverter.IMPORT_ORDER_ASCENDING

    DEFAULT_NULL_VALUE = 0

    #
    # public
    #

    def __init__(self, session):
        super(Recipe, self).__init__(session)
        self._init_options()

    def validate(self):
        super(Recipe, self).validate()

        valid_files = []
        # Local validate for input files
        for file in self.session.get_files():
            file_name = os.path.basename(file.get_filepath())
            if not bool(re.match(self.GRD_FILE_PATTERN, file_name)):
                log.warn(
                    "File '" + file.get_filepath() +
                    "' is not valid GRD TIFF file, ignored for further processing."
                )
            else:
                valid_files.append(file)

        self.session.files = valid_files

    def describe(self):
        log.info("The recipe has been validated and is ready to run.")
        log.info(make_bold("Recipe: ") + self.session.get_recipe()['name'])
        log.info(make_bold("WCS Service: ") + ConfigManager.wcs_service)
        log.info(make_bold("Mocked: ") + str(ConfigManager.mock))
        if ConfigManager.track_files:
            log.info(
                make_bold("Track files: ") + str(ConfigManager.track_files))
        if ConfigManager.skip:
            log.info(make_bold("Skip: ") + str(ConfigManager.skip))
        if ConfigManager.retry:
            log.info(make_bold("Retries: ") + str(ConfigManager.retries))
        if ConfigManager.slice_restriction is not None:
            log.info(
                make_bold("Slice Restriction: ") +
                str(ConfigManager.slice_restriction))

        multiimporter = self._get_importer()
        cov_num = len(multiimporter.importers)
        i = 1
        for importer in multiimporter.importers:
            log.info("Coverage {}/{} - {}: {} files.".format(
                i, cov_num, make_bold(importer.coverage.coverage_id),
                len(importer.coverage.slices)))
            i += 1

    def ingest(self):
        self._get_importer().ingest()

    def status(self):
        return self._get_importer().get_progress()

    #
    # private
    #

    def _init_options(self):
        self._init_coverage_options()
        self._init_input_options()
        self.coverage_id = self.session.get_coverage_id()
        self.import_order = self._set_option(self.options, "import_order",
                                             self.DEFAULT_IMPORT_ORDER)
        self.wms_import = self._set_option(self.options, "wms_import", False)
        self.scale_levels = self._set_option(self.options, "scale_levels", [])
        self.grid_cov = False

    def _init_coverage_options(self):
        covopts = self.options["coverage"]

        self._init_epsg_xy_crs()
        compound_crs = self.CRS_TEMPLATE.replace(self.EPSG_XY_CRS,
                                                 self.epsg_xy_crs)
        self.crs = self._set_option(covopts, "crs",
                                    self._resolve_crs(compound_crs))
        self._set_option(covopts, "slicer", {})
        self._init_slicer_options(covopts)

    def _init_input_options(self):
        # specify a subset of resolutions to ingest
        inputopts = self.session.get_input()
        self.modebeams = self._set_option(inputopts, "modebeams",
                                          self.DEFAULT_MODEBEAMS)
        self.polarisations = self._set_option(inputopts, "polarisations",
                                              self.DEFAULT_POLARISATIONS)

    def _init_slicer_options(self, covopts):
        sliceropts = covopts["slicer"]
        self._set_option(sliceropts, "type", "gdal")
        self._set_option(sliceropts, "pixelIsPoint", False)
        axesopts = self._init_axes_options()

        if "axes" in sliceropts:
            for axis in sliceropts["axes"]:
                for i in sliceropts["axes"][axis]:
                    axesopts[axis][i] = sliceropts["axes"][axis][i]
        sliceropts["axes"] = axesopts

    def _init_axes_options(self):
        epsg_xy_axes_labels = self.__get_epsg_xy_axes_labels()

        return {
            "ansi": {
                # e.g. s1b-iw-grd-vh-20190324t164346-20190324t164411-015499-01d0a6-002.tiff
                "min":
                "datetime(regex_extract('${file:name}', '" +
                self.GRD_FILE_PATTERN + "', 4), 'YYYYMMDD')",
                "gridOrder":
                0,
                "type":
                "ansidate",
                "irregular":
                True,
                "dataBound":
                False
            },
            epsg_xy_axes_labels[0]: {
                "min": "${gdal:minX}",
                "max": "${gdal:maxX}",
                "gridOrder": 1,
                "resolution": "${gdal:resolutionX}"
            },
            epsg_xy_axes_labels[1]: {
                "min": "${gdal:minY}",
                "max": "${gdal:maxY}",
                "gridOrder": 2,
                "resolution": "${gdal:resolutionY}"
            }
        }

    def _init_epsg_xy_crs(self):
        """
        From the first file of input file, detect its EPSG code for XY axes
        """
        gdal_ds = GDALGmlUtil(self.session.get_files()[0].get_filepath())
        self.epsg_xy_crs = gdal_ds.get_crs()

    def __get_epsg_xy_axes_labels(self):
        """
        Return a tuple of axis labels for X and Y axes
        """
        axes_labels = CRSUtil.get_axis_labels_from_single_crs(self.epsg_xy_crs)
        axis_type1 = CRSAxis.get_axis_type_by_name(axes_labels[0])

        # XY order (e.g: EPSG:3857)
        if axis_type1 == CRSAxis.AXIS_TYPE_X:
            return axes_labels[0], axes_labels[1]
        else:
            # YX order (e.g: EPSG:4326) needs to swap order
            return axes_labels[1], axes_labels[0]

    def _set_option(self, opts, key, default_value):
        if key not in opts:
            opts[key] = default_value
        return opts[key]

    def _get_importer(self):
        if self.importer is None:
            self.importer = MultiImporter(self._get_importers())
        return self.importer

    def _get_importers(self):
        ret = []
        convertors = self._get_convertors()
        for cov_id, conv in convertors.iteritems():
            coverage_slices = conv.coverage_slices

            importer = Importer(conv.resumer,
                                conv.to_coverage(coverage_slices),
                                self.wms_import, self.scale_levels,
                                self.grid_cov)
            ret.append(importer)
        return ret

    def _get_convertors(self):
        """
        Returns a map of coverage id -> GdalToCoverageConverter
        """
        convertors = {}

        for file in self.session.get_files():

            # Check if this file still exists when preparing to import
            if not FileUtil.validate_file_path(file.get_filepath()):
                continue

            # Check if this file belongs to this coverage id
            modebeam, polarisation = self._get_modebeam_polarisation(
                file.filepath)
            cov_id = self._get_coverage_id(self.coverage_id, modebeam,
                                           polarisation)

            # This file already imported in coverage_id.resume.json
            self.resumer = Resumer(cov_id)
            if self.resumer.is_file_imported(file.filepath):
                continue

            conv = self._get_convertor(convertors, cov_id)

            file_pair = FilePair(file.filepath, file.filepath)

            conv.files = [file_pair]
            crs_axes = CRSUtil(conv.crs).get_axes(self.coverage_id)

            # Different file contains different datetime from its name
            evaluator_slice = EvaluatorSliceFactory.get_evaluator_slice(
                GdalToCoverageConverter.RECIPE_TYPE, file)

            conv.data_type = self.DEFAULT_BAND_DATA_TYPE
            slices = conv._create_coverage_slices(crs_axes, evaluator_slice)
            conv.coverage_slices += slices

        return convertors

    def _get_modebeam_polarisation(self, file_path):
        """
        If this file's name matches a combination of resolution(e.g: H), modebeam(e.g: EW), polarisation(e.g: HH)
        then it is valid to import to this coverage S1_GRDH_EW_HH
        """
        # e.g: s1a-iw-grd-vh-20190326t171654-20190326t171719-026512-02f856-002.tiff
        file_name = os.path.basename(file_path)
        matcher = self.grd_pattern.match(file_name)

        tmp_modebeam = matcher.group(2)
        tmp_polarisation = matcher.group(3)

        return tmp_modebeam.upper(), tmp_polarisation.upper()

    def _get_coverage_id(self, cov_id, modebeam, polarisation):
        return cov_id.replace(self.VAR_MODEBEAM, modebeam) \
                     .replace(self.VAR_POLARISATION, polarisation)

    def _get_convertor(self, convertors, cov_id):
        if not cov_id in convertors:
            convertors[cov_id] = \
                self._create_convertor(cov_id)
        return convertors[cov_id]

    def _create_convertor(self, cov_id):
        recipe_type = GdalToCoverageConverter.RECIPE_TYPE
        sentence_evaluator = SentenceEvaluator(ExpressionEvaluatorFactory())
        files = []
        bands_metadata_fields = {}
        axis_metadata_fields = {}

        default_null_values = [self.DEFAULT_NULL_VALUE]

        return GdalToCoverageConverter(
            self.resumer, default_null_values, recipe_type,
            sentence_evaluator, cov_id, [self.BAND], files, self.crs,
            self._read_axes(self.crs), self.options['tiling'],
            self._global_metadata_fields(), self._local_metadata_fields(),
            bands_metadata_fields, axis_metadata_fields, self._metadata_type(),
            self.grid_cov, self.import_order)

    @staticmethod
    def get_name():
        return Recipe.RECIPE_NAME
Exemple #17
0
class Recipe(GeneralCoverageRecipe):

    #
    # constants
    #

    RECIPE_NAME = "sentinel2"

    # supported product levels
    LVL_L1C = 'L1C'
    LVL_L2A = 'L2A'
    LEVELS = [LVL_L1C, LVL_L2A]

    # resolutions in a single Sentinel 2 dataset; TCI (True Color Image) is 10m
    RES_10m = '10m'
    RES_20m = '20m'
    RES_60m = '60m'
    RES_TCI = 'TCI'
    # resolution (subdataset name) -> actual resolution numbers
    RES_DICT = {
        RES_10m: [1, 10, -10],
        RES_20m: [1, 20, -20],
        RES_60m: [1, 60, -60],
        RES_TCI: [1, 10, -10]
    }
    # list of subdatasets to import
    SUBDATASETS = [RES_10m, RES_20m, RES_60m, RES_TCI]

    # variables that can be used to template the coverage id
    VAR_CRS_CODE = '${crsCode}'
    VAR_RESOLUTION = '${resolution}'
    VAR_LEVEL = '${level}'
    # bands for each resolution
    BANDS_L1C = {
        RES_10m: [
            UserBand("1", "B4", "red, central wavelength 665 nm", "", "", [0],
                     ""),
            UserBand("2", "B3", "green, central wavelength 560 nm", "", "",
                     [0], ""),
            UserBand("3", "B2", "blue, central wavelength 490 nm", "", "", [0],
                     ""),
            UserBand("4", "B8", "nir, central wavelength 842 nm", "", "", [0],
                     "")
        ],
        RES_20m: [
            UserBand("1", "B5", "central wavelength 705 nm", "", "", [0], ""),
            UserBand("2", "B6", "central wavelength 740 nm", "", "", [0], ""),
            UserBand("3", "B7", "central wavelength 783 nm", "", "", [0], ""),
            UserBand("4", "B8A", "central wavelength 865 nm", "", "", [0], ""),
            UserBand("5", "B11", "central wavelength 1610 nm", "", "", [0],
                     ""),
            UserBand("6", "B12", "central wavelength 2190 nm", "", "", [0], "")
        ],
        RES_60m: [
            UserBand("1", "B1", "central wavelength 443 nm", "", "", [0], ""),
            UserBand("2", "B9", "central wavelength 945 nm", "", "", [0], ""),
            UserBand("3", "B10", "central wavelength 1375 nm", "", "", [0], "")
        ],
        RES_TCI: [
            UserBand("1", "red", "B4, central wavelength 665 nm", "", "", [0],
                     ""),
            UserBand("2", "green", "B3, central wavelength 560 nm", "", "",
                     [0], ""),
            UserBand("3", "blue", "B2, central wavelength 490 nm", "", "", [0],
                     "")
        ],
    }
    # L2A is same as L1C but doesn't have B10 in the 60m subdataset
    BANDS_L2A = {
        RES_10m:
        BANDS_L1C[RES_10m],
        RES_20m:
        BANDS_L1C[RES_20m],
        RES_60m: [
            UserBand("1", "B1", "central wavelength 443 nm", "", "", [0],
                     "nm"),
            UserBand("2", "B9", "central wavelength 945 nm", "", "", [0],
                     "nm"),
        ],
        RES_TCI:
        BANDS_L1C[RES_TCI],
    }
    BANDS = {LVL_L1C: BANDS_L1C, LVL_L2A: BANDS_L2A}
    DEFAULT_CRS = "OGC/0/AnsiDate@EPSG/0/${crsCode}"
    DEFAULT_IMPORT_ORDER = GdalToCoverageConverter.IMPORT_ORDER_ASCENDING

    DEFAULT_NULL_VALUE = 0

    #
    # public
    #

    def __init__(self, session):
        super(Recipe, self).__init__(session)
        self._init_options()

    def validate(self):
        super(Recipe, self).validate()
        if len(self.resolutions) == 0:
            raise RecipeValidationException(
                "No resolutions to import provided.")
        for res in self.resolutions:
            if res not in self.SUBDATASETS:
                raise RecipeValidationException(
                    "Invalid resolution '" + str(res) +
                    "' provided, expected a subset of " +
                    str(self.SUBDATASETS))
        for lvl in self.levels:
            if lvl not in self.LEVELS:
                raise RecipeValidationException(
                    "Invalid level '" + str(lvl) +
                    "' provided, expected a subset of " + str(self.LEVELS))

    def describe(self):
        log.info("The recipe has been validated and is ready to run.")
        log.info(make_bold("Recipe: ") + self.session.get_recipe()['name'])
        log.info(make_bold("WCS Service: ") + ConfigManager.wcs_service)
        log.info(make_bold("Mocked: ") + str(ConfigManager.mock))
        if ConfigManager.track_files:
            log.info(
                make_bold("Track files: ") + str(ConfigManager.track_files))
        if ConfigManager.skip:
            log.info(make_bold("Skip: ") + str(ConfigManager.skip))
        if ConfigManager.retry:
            log.info(make_bold("Retries: ") + str(ConfigManager.retries))
        if ConfigManager.slice_restriction is not None:
            log.info(
                make_bold("Slice Restriction: ") +
                str(ConfigManager.slice_restriction))

        multiimporter = self._get_importer()
        cov_num = len(multiimporter.importers)
        i = 1
        for importer in multiimporter.importers:
            log.info("Coverage {}/{} - {}: {} files.".format(
                i, cov_num, make_bold(importer.coverage.coverage_id),
                len(importer.coverage.slices)))
            i += 1

    def ingest(self):
        self._get_importer().ingest()

    def status(self):
        return self._get_importer().get_progress()

    #
    # private
    #

    def _init_options(self):
        self._init_coverage_options()
        self._init_input_options()
        self.coverage_id = self.session.get_coverage_id()
        self.import_order = self._set_option(self.options, 'import_order',
                                             self.DEFAULT_IMPORT_ORDER)
        self.wms_import = self._set_option(self.options, 'wms_import', False)
        self.scale_levels = self._set_option(self.options, 'scale_levels', [])
        self.grid_cov = False

    def _init_coverage_options(self):
        covopts = self.options['coverage']
        self.crs = self._set_option(covopts, 'crs', self.DEFAULT_CRS)
        self._set_option(covopts, 'slicer', {})
        self._init_slicer_options(covopts)

    def _init_input_options(self):
        # specify a subset of resolutions to ingest
        inputopts = self.session.get_input()
        self.resolutions = self._set_option(inputopts, 'resolutions', None)
        if self.resolutions is None:
            self.resolutions = self._set_option(inputopts, 'subdatasets', None)
        if self.resolutions is None:
            self.resolutions = self.SUBDATASETS
        # allow to ingest data with only particular crss
        self.crss = self._set_option(inputopts, 'crss', [])
        # ingest data if it's the specified levels
        self.levels = self._set_option(inputopts, 'levels', [])

    def _init_slicer_options(self, covopts):
        sliceropts = covopts['slicer']
        self._set_option(sliceropts, 'type', 'gdal')
        self._set_option(sliceropts, 'pixelIsPoint', False)
        axesopts = self._init_axes_options()
        if 'axes' in sliceropts:
            for axis in sliceropts['axes']:
                if axis not in axesopts:
                    raise RecipeValidationException(
                        "Invalid axis '" + axis +
                        "', expected one of ansi/E/N.")
                for k in sliceropts['axes'][axis]:
                    axesopts[axis][k] = sliceropts['axes'][axis][k]
        sliceropts['axes'] = axesopts

    def _init_axes_options(self):
        return {
            'ansi': {
                "min":
                "datetime(regex_extract('${file:path}', '.*?/S2[^_]+_MSI[^_]+_([\\d]+)T[\\d]+_', 1), 'YYYYMMDD')",
                "gridOrder": 0,
                "type": "ansidate",
                "irregular": True,
                "dataBound": False
            },
            'E': {
                "min": "${gdal:minX}",
                "max": "${gdal:maxX}",
                "gridOrder": 1,
                "resolution": "${gdal:resolutionX}"
            },
            'N': {
                "min": "${gdal:minY}",
                "max": "${gdal:maxY}",
                "gridOrder": 2,
                "resolution": "${gdal:resolutionY}"
            }
        }

    def _set_option(self, opts, key, default_value):
        if key not in opts:
            opts[key] = default_value
        return opts[key]

    def _get_importer(self):
        if self.importer is None:
            self.importer = MultiImporter(self._get_importers())
        return self.importer

    def _get_importers(self):
        ret = []
        convertors = self._get_convertors()
        for cov_id, conv in convertors.iteritems():
            coverage_slices = conv.coverage_slices

            importer = Importer(conv.resumer,
                                conv.to_coverage(coverage_slices),
                                self.wms_import, self.scale_levels,
                                self.grid_cov)
            ret.append(importer)
        return ret

    def _get_convertors(self):
        """
        Returns a map of coverage id -> GdalToCoverageConverter
        """
        convertors = {}
        for f in self.session.get_files():
            # This one does not contain any information for geo bounds
            if not FileUtil.validate_file_path(f.get_filepath()):
                continue

            gdal_ds = GDALGmlUtil(f.get_filepath())
            subdatasets = self._get_subdatasets(gdal_ds, f)
            gdal_ds.close()

            level = self._get_level(f.get_filepath())
            if len(self.levels) > 0 and level not in self.levels:
                # skip file, as it's not in the list of levels provided in the ingredients file
                log.debug("Skipping " + level + " data")
                continue
            crs_code = ""

            evaluator_slice = None

            for res in self.resolutions:
                subds_file = self._get_subdataset_file(subdatasets, res)
                crs_code = self._get_crs_code(subds_file.get_filepath(),
                                              crs_code)
                if len(self.crss) > 0 and crs_code not in self.crss:
                    # skip CRS, it's not in the list of CRSs provided in the ingredients file
                    log.debug("Skipping data with CRS " + crs_code)
                    continue
                cov_id = self._get_coverage_id(self.coverage_id, crs_code,
                                               level, res)

                # This file already imported in coverage_id.resume.json
                self.resumer = Resumer(cov_id)
                if self.resumer.is_file_imported(f.filepath):
                    continue

                conv = self._get_convertor(convertors, cov_id, crs_code, level,
                                           res)

                file_pair = FilePair(subds_file.filepath, f.filepath)

                conv.files = [file_pair]
                crs_axes = CRSUtil(conv.crs).get_axes(self.coverage_id)

                if evaluator_slice is None:
                    # This one contains information for geo bounds
                    evaluator_slice = EvaluatorSliceFactory.get_evaluator_slice(
                        GdalToCoverageConverter.RECIPE_TYPE, subds_file)

                # Resolution 10m, 20m and 60m have same data type (UInt16) while TCI has data type (Byte)
                if res == self.RES_TCI:
                    conv.data_type = "Byte"
                else:
                    conv.data_type = "UInt16"

                # Fixed values for 3 axes of Sentinel 2 coverage
                axis_resolutions = self.RES_DICT[res]
                slices = conv._create_coverage_slices(crs_axes,
                                                      evaluator_slice,
                                                      axis_resolutions)
                conv.coverage_slices += slices

        return convertors

    def _get_subdatasets(self, gdal_ds, f):
        subdatasets = gdal_ds.get_subdatasets()
        if len(subdatasets) != len(self.SUBDATASETS):
            raise RuntimeException("Cannot handle Sentinel 2 file " +
                                   f.get_filepath() + ": GDAL reported " +
                                   str(len(subdatasets)) +
                                   " subdatasets, expected " +
                                   str(len(self.SUBDATASETS)) + ".")
        return [name for (name, _) in subdatasets]

    def _get_subdataset_file(self, subdatasets, res):
        check = ":" + res + ":"
        for name in subdatasets:
            if check in name:
                return File(name)
        # else not found
        raise RuntimeException("Resolution (string ':" + res +
                               ":') not found in subdatasets: " +
                               str(subdatasets))

    def _get_crs_code(self, subds, crs_code):
        """
        Return the <crs_code> from subds of the form: 
        SENTINEL2_<level>:<file>:<resolution>:EPSG_<crs_code>
        """
        if crs_code == "":
            parts = subds.split(":EPSG_")
            if len(parts) != 2:
                raise RuntimeException(
                    "Cannot determine EPSG code from subdataset " + subds)
            return parts[1]
        return crs_code

    def _get_level(self, file_path):
        if '_MSIL1C_' in file_path:
            return self.LVL_L1C
        elif '_MSIL2A_' in file_path:
            return self.LVL_L2A
        else:
            log.warn("Cannot determine level from collected file: " +
                     file_path + "; assuming L1C.")
            return self.LVL_L1C

    def _get_coverage_id(self, cov_id, crs_code, level, resolution):
        return cov_id.replace(self.VAR_CRS_CODE, crs_code) \
                     .replace(self.VAR_LEVEL, level) \
                     .replace(self.VAR_RESOLUTION, resolution)

    def _get_convertor(self, convertors, cov_id, crs_code, level, res):
        if not cov_id in convertors:
            convertors[cov_id] = \
                self._create_convertor(convertors, cov_id, crs_code, level, res)
        return convertors[cov_id]

    def _create_convertor(self, convertors, cov_id, crs_code, level, res):
        recipe_type = GdalToCoverageConverter.RECIPE_TYPE
        sentence_evaluator = SentenceEvaluator(ExpressionEvaluatorFactory())
        files = []
        crs = self._get_crs(crs_code)
        bands_metadata_fields = {}
        axis_metadata_fields = {}

        default_null_values = [self.DEFAULT_NULL_VALUE]

        return GdalToCoverageConverter(
            self.resumer, default_null_values, recipe_type,
            sentence_evaluator, cov_id, self.BANDS[level][res], files, crs,
            self._read_axes(crs), self.options['tiling'],
            self._global_metadata_fields(), self._local_metadata_fields(),
            bands_metadata_fields, axis_metadata_fields, self._metadata_type(),
            self.grid_cov, self.import_order)

    def _get_crs(self, crs_code):
        crs = self.crs.replace(self.VAR_CRS_CODE, crs_code)
        return self._resolve_crs(crs)

    @staticmethod
    def get_name():
        return Recipe.RECIPE_NAME
Exemple #18
0
class Recipe(BaseRecipe):
    def __init__(self, session):
        """
        The recipe class for regular timeseries. To get an overview of the ingredients needed for this
        recipe check ingredients/time_series_regular
        """
        super(Recipe, self).__init__(session)
        self.options = session.get_recipe(
        )['options'] if "options" in session.get_recipe() else {}
        self.importer = None
        self.resumer = Resumer(self.session.get_coverage_id())

        validator = GDALValidator(self.session.files)
        if ConfigManager.skip:
            self.session.files = validator.get_valid_files()

    def validate(self):
        super(Recipe, self).validate()

        if "time_crs" not in self.options or self.options['time_crs'] == "":
            raise RecipeValidationException("No valid time crs provided")

        if 'time_start' not in self.options:
            raise RecipeValidationException(
                "No valid time start parameter provided")

        if 'time_step' not in self.options:
            raise RecipeValidationException(
                "You have to provide a valid time step indicating both the value and the unit of time"
            )

        if 'band_names' not in self.options:
            self.options['band_names'] = None

    def describe(self):
        """
        Implementation of the base recipe describe method
        """
        importer = self._get_importer()

        slices = importer.get_slices_for_description()
        number_of_files = len(slices)
        log.info(
            "All files have been analyzed. Please verify that the axis subsets of the first {} files above are correct."
            .format(number_of_files))
        index = 1
        for slice in slices:
            log.info("Slice " + str(index) + ": " + str(slice))
            index += 1

    def ingest(self):
        """
        Ingests the input files
        """
        self._get_importer().ingest()

    def status(self):
        """
        Implementation of the status method
        :rtype (int, int)
        """
        return self._get_importer().get_progress()

    def _generate_timeseries_tuples(self, limit=None):
        """
        Generate the timeseries tuples from the original files based on the recipe.
        And sort the files in order of time.
        :rtype: list[TimeFileTuple]
        """
        ret = []
        if limit is None:
            limit = len(self.session.get_files())

        time_offset = 0
        time_format = self.options[
            'time_format'] if self.options['time_format'] != "auto" else None
        time_start = DateTimeUtil(self.options['time_start'], time_format,
                                  self.options['time_crs'])
        for tfile in self.session.get_files():
            if len(ret) == limit:
                break
            time_tuple = TimeFileTuple(
                self._get_datetime_with_step(time_start, time_offset), tfile)
            ret.append(time_tuple)
            time_offset += 1

        # Currently, only sort by datetime to import coverage slices (default is ascending), option: to sort descending
        if self.options[
                "import_order"] == AbstractToCoverageConverter.IMPORT_ORDER_DESCENDING:
            return sorted(ret, reverse=True)

        return sorted(ret)

    def _get_datetime_with_step(self, current, offset):
        """
        Returns the new datetime
        :param DateTimeUtil current: the date to add the step
        :param int offset: the number of steps to make
        """
        days, hours, minutes, seconds = tuple(
            [offset * item for item in self._get_real_step()])
        return DateTimeUtil(
            current.datetime.replace(days=+days,
                                     hours=+hours,
                                     minutes=+minutes,
                                     seconds=+seconds).isoformat(), None,
            self.options['time_crs'])

    def _get_real_step(self):
        res = re.search(
            "([0-9]*[\s]*days)?[\s]*"
            "([0-9]*[\s]*hours)?[\s]*"
            "([0-9]*[\s]*minutes)?[\s]*"
            "([0-9]*[\s]*seconds)?[\s]*", self.options['time_step'])
        days_s = res.group(1)
        hours_s = res.group(2)
        minutes_s = res.group(3)
        seconds_s = res.group(4)

        if days_s is None and hours_s is None and minutes_s is None and seconds_s is None:
            raise RuntimeException(
                'The time step does not have a valid unit of measure. '
                'Example of a valid time step: 1 days 2 hours 10 seconds')

        days = (int(days_s.replace("days", "").strip())
                if days_s is not None else 0)
        hours = (int(hours_s.replace("hours", "").strip())
                 if hours_s is not None else 0)
        minutes = (int(minutes_s.replace("minutes", "").strip())
                   if minutes_s is not None else 0)
        seconds = (int(seconds_s.replace("seconds", "").strip())
                   if seconds_s is not None else 0)
        return days, hours, minutes, seconds

    def _get_slices(self, crs):
        """
        Returns the slices for the collection of files given
        """
        crs_axes = CRSUtil(crs).get_axes(self.session.coverage_id)

        slices = []
        timeseries = self._generate_timeseries_tuples()
        count = 1
        for tpair in timeseries:
            file_path = tpair.file.get_filepath()

            # NOTE: don't process any imported file from *.resume.json as it is just waisted time
            if not self.resumer.is_file_imported(file_path):
                timer = Timer()

                # print which file is analyzing
                FileUtil.print_feedback(count, len(timeseries), file_path)
                if not FileUtil.validate_file_path(file_path):
                    continue

                valid_coverage_slice = True

                try:
                    subsets = GdalAxisFiller(crs_axes,
                                             GDALGmlUtil(file_path)).fill(True)
                    subsets = self._fill_time_axis(tpair, subsets)
                except Exception as ex:
                    # If skip: true then just ignore this file from importing, else raise exception
                    FileUtil.ignore_coverage_slice_from_file_if_possible(
                        file_path, ex)
                    valid_coverage_slice = False

                if valid_coverage_slice:
                    slices.append(Slice(subsets, FileDataProvider(tpair.file)))

            timer.print_elapsed_time()
            count += 1

        return slices

    def _fill_time_axis(self, tpair, subsets):
        """
        Fills the time axis parameters
        :param TimeFileTuple tpair: the input pair
        :param list[AxisSubset] subsets: the axis subsets for the tpair
        """
        days, hours, minutes, seconds = self._get_real_step()
        number_of_days = days + hours / float(24) + minutes / float(
            60 * 24) + seconds / float(60 * 60 * 24)
        for i in range(0, len(subsets)):
            if subsets[i].coverage_axis.axis.crs_axis is not None and subsets[
                    i].coverage_axis.axis.crs_axis.is_time_axis():
                subsets[i].coverage_axis.axis = RegularAxis(
                    subsets[i].coverage_axis.axis.label,
                    subsets[i].coverage_axis.axis.uomLabel,
                    tpair.time.to_string(), tpair.time.to_string(),
                    tpair.time.to_string(),
                    subsets[i].coverage_axis.axis.crs_axis)
                subsets[i].coverage_axis.grid_axis.resolution = number_of_days
                subsets[i].interval.low = tpair.time.to_string()
        return subsets

    def _get_coverage(self):
        """
        Returns the coverage to be used for the importer
        """
        gdal_dataset = GDALGmlUtil.open_gdal_dataset_from_any_file(
            self.session.get_files())
        crs = CRSUtil.get_compound_crs(
            [self.options['time_crs'],
             gdal_dataset.get_crs()])
        slices = self._get_slices(crs)
        fields = GdalRangeFieldsGenerator(
            gdal_dataset, self.options['band_names']).get_range_fields()
        coverage = Coverage(self.session.get_coverage_id(), slices, fields,
                            crs, gdal_dataset.get_band_gdal_type(),
                            self.options['tiling'])
        return coverage

    def _get_importer(self):
        if self.importer is None:
            self.importer = Importer(self.resumer, self._get_coverage(),
                                     self.options['wms_import'],
                                     self.options['scale_levels'])
        return self.importer

    @staticmethod
    def get_name():
        return "time_series_regular"
Exemple #19
0
class Importer:
    def __init__(self,
                 coverage,
                 insert_into_wms=False,
                 scale_levels=None,
                 grid_coverage=False):
        """
        Imports a coverage into wcst
        :param Coverage coverage: the coverage to be imported
        """
        self.coverage = coverage
        self.resumer = Resumer(coverage.coverage_id)
        self.coverage.slices = SliceRestricter(
            self.resumer.eliminate_already_imported_slices(
                self.coverage.slices)).get_slices()
        self.processed = 0
        self.total = len(coverage.slices)
        self.insert_into_wms = insert_into_wms
        self.scale_levels = scale_levels
        self.grid_coverage = grid_coverage

    def ingest(self):
        """
        Ingests the given coverage
        """
        if len(self.coverage.slices) > 0:
            if self._is_insert():
                self._initialize_coverage()

            # Insert the remaining slices
            self._insert_slices()

            if self.insert_into_wms:
                self._insert_update_into_wms()

    def get_progress(self):
        """
        Returns the progress of the import
        :rtype: tuple
        """
        if self.total == 0:
            log.warn("No slices to import.")
            return -1, -1
        return self.processed, self.total

    def _insert_slice(self, current):
        """
        Inserts one slice
        :param Slice current: the slice to be imported
        """
        current_exception = None
        current_str = ""

        for attempt in range(0, ConfigManager.retries):
            try:
                current_str = str(current)
                file = self._generate_gml_slice(current)
                subsets = self._get_update_subsets_for_slice(current)
                request = WCSTUpdateRequest(self.coverage.coverage_id,
                                            file.get_url(), subsets,
                                            ConfigManager.insitu)
                executor = ConfigManager.executor
                executor.execute(request, mock=ConfigManager.mock)
                file.release()
                self.resumer.add_imported_data(current.data_provider)
            except Exception as e:
                log.warn(
                    "\nException thrown when trying to insert slice: \n" +
                    current_str +
                    "Retrying, you can safely ignore the warning for now. Tried "
                    + str(attempt + 1) + " times.\n")
                current_exception = e
                sleep(ConfigManager.retry_sleep)
                pass
            else:
                break
        else:
            log.warn("\nFailed to insert slice. Attempted " +
                     str(ConfigManager.retries) + " times.")
            raise current_exception

    def get_slices_for_description(self):
        """
        Returns a list with the first slices to be used in the import description
        :rtype: list[Slice]
        """
        slices = []
        # If number of files < 5 print all files, or only print first 5 files
        max = ConfigManager.description_max_no_slices if ConfigManager.description_max_no_slices < len(
            self.coverage.slices) else len(self.coverage.slices)
        for i in range(0, max):
            slices.append(self.coverage.slices[i])
        return slices

    def _insert_slices(self):
        """
        Insert the slices of the coverage
        """
        is_loggable = True
        is_ingest_file = True
        file_name = ""
        try:
            log_file = open(
                ConfigManager.resumer_dir_path + "/" +
                ConfigManager.ingredient_file_name + ".log", "a+")
            log_file.write(
                "\n-------------------------------------------------------------------------------------"
            )
            log_file.write("\nIngesting coverage '" +
                           self.coverage.coverage_id + "'...")
        except Exception as e:
            is_loggable = False
            log.warn(
                "\nCannot create log file for this ingestion process, only log to console."
            )

        for i in range(self.processed, self.total):
            try:
                # Log the time to send the slice (file) to server to ingest
                # NOTE: in case of using wcs_extract recipe, it will fetch file from server, so don't know the file size
                if hasattr(self.coverage.slices[i].data_provider, "file"):
                    file_path = self.coverage.slices[
                        i].data_provider.file.filepath
                    file_size_in_mb = round(
                        (float)(os.path.getsize(file_path)) / (1000 * 1000), 2)
                    file_name = os.path.basename(file_path)
                    start_time = time.time()
                    self._insert_slice(self.coverage.slices[i])
                    end_time = time.time()
                    time_to_ingest = round(end_time - start_time, 2)
                    if time_to_ingest < 0.0000001:
                        time_to_ingest = 0.0000001
                    size_per_second = round(file_size_in_mb / time_to_ingest,
                                            2)
                    log_text = "\nFile '" + file_name + "' with size " + str(file_size_in_mb) + " MB; " \
                           "Total time to ingest " + str(time_to_ingest) + "s @ " + str(size_per_second) + " MB/s."
                    # write to console
                    log.info(log_text)
                    if is_loggable:
                        # write to log file
                        log_file.write(log_text)
                else:
                    is_ingest_file = False
                    # extract coverage from petascope to ingest a new coverage
                    start_time = time.time()
                    self._insert_slice(self.coverage.slices[i])
                    end_time = time.time()
                    time_to_ingest = round(end_time - start_time, 2)
                    log.info("\nTotal time to ingest: " + str(time_to_ingest) +
                             " s.")
            except Exception as e:
                if ConfigManager.skip:
                    log.warn("Skipped slice " + str(self.coverage.slices[i]))
                    if is_loggable and is_ingest_file:
                        log_file.write("\nSkipped file: " + file_name + ".")
                        log_file.write("\nReason: " + str(e))
                else:
                    if is_loggable and is_ingest_file:
                        log_file.write("\nError file: " + file_name + ".")
                        log_file.write("\nReason: " + str(e))
                        log_file.write("\nResult: failed.")
                        log_file.close()

                    raise e
            self.processed += 1

        log_file.write("\nResult: success.")
        log_file.close()

    def _initialize_coverage(self):
        """
        Initializes the coverage
        """
        file = self._generate_initial_gml_slice()
        request = WCSTInsertRequest(file.get_url(), False,
                                    self.coverage.pixel_data_type,
                                    self.coverage.tiling)
        executor = ConfigManager.executor
        current_insitu_value = executor.insitu
        executor.insitu = None
        executor.execute(request, mock=ConfigManager.mock)
        executor.insitu = current_insitu_value
        file.release()

        # If scale_levels specified in ingredient files, send the query to Petascope to create downscaled collections
        if self.scale_levels:
            # Levels be ascending order
            sorted_list = sorted(self.scale_levels)
            # NOTE: each level is processed separately with each HTTP request
            for level in sorted_list:
                request = WCSTInsertScaleLevelsRequest(
                    self.coverage.coverage_id, level)
                executor.execute(request, mock=ConfigManager.mock)

    def _get_update_subsets_for_slice(self, slice):
        """
        Returns the given slice's interval as a list of wcst subsets
        :param slice: the slice for which to generate this
        :rtype: list[WCSTSubset]
        """
        subsets = []
        for axis_subset in slice.axis_subsets:
            low = axis_subset.interval.low
            high = axis_subset.interval.high
            # if ConfigManager.subset_correction and high is not None and low != high and type(low) != str:
            if ConfigManager.subset_correction and high is not None and low != high and type(
                    low) == str:
                # Time axis with type = str (e.g: "1970-01-01T02:03:06Z")
                time_seconds = 1
                # AnsiDate (need to change from date to seconds)
                if axis_subset.coverage_axis.axis.crs_axis.is_uom_day():
                    time_seconds = DateTimeUtil.DAY_IN_SECONDS
                low = decimal.Decimal(str(
                    arrow.get(low).float_timestamp)) + decimal.Decimal(
                        str(axis_subset.coverage_axis.grid_axis.resolution *
                            time_seconds)) / 2
                low = DateTimeUtil.get_datetime_iso(low)

                if high is not None:
                    high = decimal.Decimal(str(
                        arrow.get(high).float_timestamp)) - decimal.Decimal(
                            str(axis_subset.coverage_axis.grid_axis.resolution
                                * time_seconds)) / 2
                    high = DateTimeUtil.get_datetime_iso(high)

            elif ConfigManager.subset_correction and high is not None and low != high and type(
                    low) != str:
                # regular axes (e.g: latitude, longitude, index1d)
                low = decimal.Decimal(str(low)) + decimal.Decimal(
                    str(axis_subset.coverage_axis.grid_axis.resolution)) / 2
                if high is not None:
                    high = decimal.Decimal(str(high)) - decimal.Decimal(
                        str(axis_subset.coverage_axis.grid_axis.resolution)) / 2

            subsets.append(
                WCSTSubset(axis_subset.coverage_axis.axis.label, low, high))
        return subsets

    def _get_update_crs(self, slice, crs):
        """
        Returns the crs corresponding to the axes that are data bound
        :param slice: the slice for which the gml should be created
        :param crs: the crs of the coverage
        :return: String
        """
        crsAxes = []
        for axis_subset in slice.axis_subsets:
            if axis_subset.coverage_axis.data_bound:
                crsAxes.append(axis_subset.coverage_axis.axis.crs_axis)
        crsUtil = CRSUtil(crs)
        return crsUtil.get_crs_for_axes(crsAxes)

    def _generate_gml_slice(self, slice):
        """
        Generates the gml for a regular slice
        :param slice: the slice for which the gml should be created
        :rtype: File
        """
        metadata_provider = MetadataProvider(
            self.coverage.coverage_id, self._get_update_axes(slice),
            self.coverage.range_fields,
            self._get_update_crs(slice,
                                 self.coverage.crs), None, self.grid_coverage)
        data_provider = slice.data_provider
        file = Mediator(metadata_provider, data_provider).get_gml_file()
        return file

    def _get_update_axes(self, slice):
        """
        Returns the axes for the slices that are bound to the data (e.g. Lat and Long for a 2-D raster)
        :param slice: the slice for which the gml should be created
        :rtype: dict[Axis, GridAxis]
        """
        axes = OrderedDict()
        for axis_subset in slice.axis_subsets:
            if axis_subset.coverage_axis.data_bound:
                axes[axis_subset.coverage_axis.
                     axis] = axis_subset.coverage_axis.grid_axis
        return axes

    def _generate_initial_gml_slice(self):
        """
        Returns the initial slice in gml format
        :rtype: File
        """
        return self._generate_initial_gml_db()

    def _generate_initial_gml_db(self):
        """
        Generates the initial slice in gml for importing using the database method and returns the gml for it
        :rtype: File
        """
        # Transform the axes domains such that only a point is defined.
        # For the first slice we need to import a single point, which will then be updated with the real data
        axes_map = OrderedDict()
        for axis, grid_axis in self.coverage.get_insert_axes().iteritems():
            if axis.coefficient is not None:
                assert type(axis.coefficient
                            ) == list, "Axis coefficients not of type list."
                assert len(
                    axis.coefficient) > 0, "The list of coefficients is empty."
                # Get the first coefficient in irregular coverage  to create a initial slice
                axis = IrregularAxis(axis.label, axis.uomLabel, axis.low,
                                     axis.high, axis.origin,
                                     [axis.coefficient[0]], axis.crs_axis)
            axes_map[axis] = GridAxis(grid_axis.order, grid_axis.label,
                                      grid_axis.resolution, 0, 0)
        metadata_provider = MetadataProvider(
            self.coverage.coverage_id, axes_map, self.coverage.range_fields,
            self.coverage.crs, self.coverage.metadata, self.grid_coverage)
        tuple_list = ",".join(['0'] * len(self.coverage.range_fields))
        data_provider = TupleListDataProvider(tuple_list)
        file = Mediator(metadata_provider, data_provider).get_gml_file()
        return file

    def _generate_initial_gml_inistu(self):
        """
        Generates the initial slice in gml for importing using the insitu method and returns the gml file for it
        :rtype: File
        """
        metadata_provider = MetadataProvider(self.coverage.coverage_id,
                                             self.coverage.get_insert_axes(),
                                             self.coverage.range_fields,
                                             self.coverage.crs,
                                             self.coverage.metadata,
                                             self.grid_coverage)
        data_provider = self.coverage.slices[0].data_provider
        file = Mediator(metadata_provider, data_provider).get_gml_file()
        self.processed += 1
        self.resumer.add_imported_data(data_provider)
        return file

    def _insert_update_into_wms(self):
        """
        Inserts or Update the coverage into the wms service
        """
        try:
            # First check from WMS GetCapabilities if layer name (coverage id) existed
            request = WMSTGetCapabilities()
            response = ConfigManager.executor.execute(request)

            root_element = etree.fromstring(response)
            namespace = {"wms": "http://www.opengis.net/wms"}
            exist = root_element.xpath(
                "//wms:Capability/wms:Layer/wms:Layer/wms:Name/text()='" +
                self.coverage.coverage_id + "'",
                namespaces=namespace)

            # WMS layer does not exist, just insert new WMS layer from imported coverage
            if exist is False:
                request = WMSTFromWCSInsertRequest(self.coverage.coverage_id,
                                                   False)
            else:
                # WMS layer existed, update WMS layer from updated coverage
                request = WMSTFromWCSUpdateRequest(self.coverage.coverage_id,
                                                   False)
            ConfigManager.executor.execute(request, mock=ConfigManager.mock)
        except Exception as e:
            log.error(
                "Exception thrown when importing in WMS. Please try to reimport in WMS manually."
            )
            raise e

    def _is_insert(self):
        """
        Returns true if the coverage should be inserted, false if only updates are needed
        :rtype: bool
        """
        cov = CoverageUtil(self.coverage.coverage_id)
        return not cov.exists()