Ejemplo n.º 1
0
    def __update_data_from_catalog_config(self, catalog_config):
        """Updates configuration and data using given input catalog config

        :param catalog_config: catalog config, from yml stac_config[catalogs]
        :type catalog_config: dict
        """
        model = catalog_config["model"]

        self.catalog_config = update_nested_dict(self.catalog_config,
                                                 catalog_config)

        # parse f-strings
        # defaultdict usage will return "" for missing keys in format_args
        format_args = copy.deepcopy(self.stac_config)
        format_args["catalog"] = defaultdict(
            str, dict(model, **{
                "root": self.root,
                "url": self.url
            }))
        parsed_model = format_dict_items(self.catalog_config["model"],
                                         **format_args)

        self.update_data(parsed_model)

        return True
Ejemplo n.º 2
0
    def set_stac_product_type_by_id(self, product_type, **kwargs):
        """Updates catalog with given product_type

        :param product_type: product type
        :type product_type: str
        """
        collection = StacCollection(
            url=self.url,
            stac_config=self.stac_config,
            provider=self.provider,
            eodag_api=self.eodag_api,
            root=self.root,
        ).get_collection_by_id(product_type)

        cat_model = copy.deepcopy(
            self.stac_config["catalogs"]["product_type"]["model"])
        # parse f-strings
        format_args = copy.deepcopy(self.stac_config)
        format_args["catalog"] = defaultdict(str, **self.data)
        format_args["collection"] = collection
        try:
            parsed_dict = format_dict_items(cat_model, **format_args)
        except Exception:
            logger.error("Could not format product_type catalog")
            raise

        self.update_data(parsed_dict)

        # update search args
        self.search_args.update({"product_type": product_type})

        return parsed_dict
Ejemplo n.º 3
0
    def __get_item_list(self, search_results, catalog):
        """Build STAC items list from EODAG search results

        :param search_results: EODAG search results
        :type search_results: :class:`eodag.api.search_result.SearchResult`
        :param catalog: STAC catalog dict used for parsing item metadata
        :type catalog: dict
        :returns: STAC item dicts list
        :rtype: list
        """
        if len(search_results) <= 0:
            return []

        item_model = self.__filter_item_model_properties(
            self.stac_config["item"], search_results[0].product_type)

        item_list = []
        for product in search_results:
            # parse jsonpath
            product_item = jsonpath_parse_dict_items(
                item_model, {"product": product.__dict__})
            # parse f-strings
            format_args = copy.deepcopy(self.stac_config)
            format_args["catalog"] = catalog
            format_args["item"] = product_item
            product_item = format_dict_items(product_item, **format_args)
            product_item["bbox"] = [float(i) for i in product_item["bbox"]]

            # remove empty properties
            product_item = self.__filter_item_properties_values(product_item)

            item_list.append(product_item)

        return item_list
Ejemplo n.º 4
0
    def get_stac_extension(url, stac_config, extension, **kwargs):
        """Parse STAC extension from config and return as dict

        :param url: requested URL
        :type url: str
        :param stac_config: STAC configuration from stac.yml conf file
        :type stac_config: dict
        :param stac_config: STAC configuration from stac.yml conf file
        :type stac_config: dic
        :param extension: extension name
        :type extension: str
        :param dict kwargs: Additional variables needed for parsing extension
        :returns: STAC extension as dictionnary
        :rtype: dict
        """
        extension_model = (copy.deepcopy(stac_config).get("extensions",
                                                          {}).get(
                                                              extension, {}))

        # parse f-strings
        format_args = copy.deepcopy(stac_config)
        format_args["extension"] = {
            "url": url,
            "properties": kwargs.get("properties", {}),
        }
        extension = format_dict_items(extension_model, **format_args)

        return extension
Ejemplo n.º 5
0
    def set_stac_location_by_id(self, location, catalog_name):
        """Updates and returns catalog with given location

        :param location: feature attribute value for shp filtering
        :type location: str
        :param catalog_name: catalog/location name
        :type catalog_name: str
        :returns: updated catalog
        :rtype: dict
        """
        location_list_cat_key = catalog_name + "_list"

        if location_list_cat_key not in self.stac_config["catalogs"]:
            logger.warning(
                "no entry found for {}'s list in location_config".format(
                    catalog_name))
            return {}
        location_config = self.stac_config["catalogs"][location_list_cat_key]

        for k in ["path", "attr"]:
            if k not in location_config.keys():
                logger.warning(
                    "no {} key found for {}'s list in location_config".format(
                        k, catalog_name))
                return {}
        path = location_config["path"]
        attr = location_config["attr"]

        with shapefile.Reader(path) as shp:
            geom_hits = [
                shape(shaperec.shape) for shaperec in shp.shapeRecords()
                if shaperec.record.as_dict().get(attr, None) == location
            ]

        if len(geom_hits) == 0:
            logger.warning("no feature found in %s matching %s=%s" %
                           (path, attr, location))
            return {}

        geom = unary_union(geom_hits)

        cat_model = copy.deepcopy(
            self.stac_config["catalogs"]["country"]["model"])
        # parse f-strings
        format_args = copy.deepcopy(self.stac_config)
        format_args["catalog"] = defaultdict(str, **self.data)
        format_args["feature"] = defaultdict(str, {
            "geometry": geom,
            "id": location
        })
        parsed_dict = format_dict_items(cat_model, **format_args)

        self.update_data(parsed_dict)

        # update search args
        self.search_args.update({"geom": geom})

        return parsed_dict
Ejemplo n.º 6
0
 def get_collections(self, *args, **kwargs):
     """Get the collection to which the product belongs"""
     # See https://earth.esa.int/web/sentinel/missions/sentinel-2/news/-
     # /asset_publisher/Ac0d/content/change-of
     # -format-for-new-sentinel-2-level-1c-products-starting-on-6-december
     product_type = kwargs.get("productType")
     if product_type is None and not self.product_type_def_params:
         collections = set()
         collection = getattr(self.config, "collection", None)
         if collection is None:
             try:
                 for product_type, product_config in self.config.products.items():
                     if product_type != GENERIC_PRODUCT_TYPE:
                         collections.add(product_config["collection"])
                     else:
                         collections.add(
                             format_dict_items(product_config, **kwargs).get(
                                 "collection", ""
                             )
                         )
             except KeyError:
                 collections.add("")
         else:
             collections.add(collection)
         return tuple(collections)
     if self.provider == "peps":
         if product_type == "S2_MSI_L1C":
             date = kwargs.get("startTimeFromAscendingNode")
             # If there is no criteria on date, we want to query all the collections
             # known for providing L1C products
             if date is None:
                 collections = ("S2", "S2ST")
             else:
                 match = re.match(
                     r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})", date
                 ).groupdict()
                 year, month, day = (
                     int(match["year"]),
                     int(match["month"]),
                     int(match["day"]),
                 )
                 if year > 2016 or (year == 2016 and month == 12 and day > 5):
                     collections = ("S2ST",)
                 else:
                     collections = ("S2", "S2ST")
         else:
             collections = (self.product_type_def_params.get("collection", ""),)
     else:
         collection = getattr(self.config, "collection", None)
         if collection is None:
             collection = (
                 self.product_type_def_params.get("collection", None) or product_type
             )
         collections = (
             (collection,) if not isinstance(collection, list) else tuple(collection)
         )
     return collections
Ejemplo n.º 7
0
    def __get_collection_list(self, filters=None):
        """Build STAC collections list

        :param filters: additional filters for collections search
        :type filters: dict
        :returns: STAC collection dicts list
        :rtype: list
        """
        collection_model = copy.deepcopy(self.stac_config["collection"])

        product_types = self.__get_product_types(filters)

        collection_list = []
        for product_type in product_types:
            # get default provider for each product_type
            product_type_provider = (self.provider or next(
                self.eodag_api._plugins_manager.get_search_plugins(
                    product_type=product_type["ID"])).provider)

            # parse jsonpath
            product_type_collection = jsonpath_parse_dict_items(
                collection_model,
                {
                    "product_type":
                    product_type,
                    "provider":
                    self.eodag_api.providers_config[product_type_provider].
                    __dict__,
                },
            )
            # parse f-strings
            format_args = copy.deepcopy(self.stac_config)
            format_args["collection"] = dict(
                product_type_collection, **{
                    "url": self.url,
                    "root": self.root
                })
            product_type_collection = format_dict_items(
                product_type_collection, **format_args)

            collection_list.append(product_type_collection)

        return collection_list
Ejemplo n.º 8
0
    def get_stac_item_from_product(self, product):
        """Build STAC item from EODAG product

        :param product: EODAG product
        :type product: :class:`eodag.api.product._product.EOProduct`
        :returns: STAC item
        :rtype: list
        """
        product_type = product.product_type

        item_model = self.__filter_item_model_properties(
            self.stac_config["item"], product_type)

        catalog = StacCatalog(
            url=self.url.split("/items")[0],
            stac_config=self.stac_config,
            root=self.root,
            provider=self.provider,
            eodag_api=self.eodag_api,
            catalogs=[product_type],
        )

        # parse jsonpath
        product_item = jsonpath_parse_dict_items(item_model,
                                                 {"product": product.__dict__})
        # parse f-strings
        format_args = copy.deepcopy(self.stac_config)
        # format_args["collection"] = dict(catalog.as_dict(), **{"url": catalog.url})
        format_args["catalog"] = dict(
            catalog.as_dict(), **{
                "url": catalog.url,
                "root": catalog.root
            })
        format_args["item"] = product_item
        product_item = format_dict_items(product_item, **format_args)
        product_item["bbox"] = [float(i) for i in product_item["bbox"]]

        # remove empty properties
        product_item = self.__filter_item_properties_values(product_item)

        self.update_data(product_item)
        return self.as_dict()
Ejemplo n.º 9
0
    def set_stac_cloud_cover_by_id(self, cloud_cover, **kwargs):
        """Updates and returns catalog with given max cloud_cover

        :param cloud_cover: cloud_cover number
        :type cloud_cover: str
        :returns: updated catalog
        :rtype: dict
        """
        cat_model = copy.deepcopy(
            self.stac_config["catalogs"]["cloud_cover"]["model"])
        # parse f-strings
        format_args = copy.deepcopy(self.stac_config)
        format_args["catalog"] = defaultdict(str, **self.data)
        format_args["cloud_cover"] = cloud_cover
        parsed_dict = format_dict_items(cat_model, **format_args)

        self.update_data(parsed_dict)

        # update search args
        self.search_args.update({"cloudCover": cloud_cover})

        return parsed_dict
Ejemplo n.º 10
0
    def get_product_type_def_params(self, product_type, **kwargs):
        """Get the provider product type definition parameters"""
        if product_type in self.config.products.keys():
            logger.debug(
                "Getting provider product type definition parameters for %s",
                product_type,
            )
            return self.config.products[product_type]
        elif GENERIC_PRODUCT_TYPE in self.config.products.keys():
            logger.debug(
                "Getting genric provider product type definition parameters for %s",
                product_type,
            )
            return {
                k: v
                for k, v in format_dict_items(
                    self.config.products[GENERIC_PRODUCT_TYPE], **
                    kwargs).items() if v
            }

        else:
            return {}
Ejemplo n.º 11
0
    def set_stac_date(self, datetime_min, datetime_max, catalog_model):
        """Updates catalog data using given dates

        :param datetime_min: date min of interval
        :type datetime_min: :class:`datetime.datetime`
        :param datetime_max: date max of interval
        :type datetime_max: :class:`datetime.datetime`
        :param catalog_model: catalog model to use, from yml stac_config[catalogs]
        :type catalog_model: dict
        :returns: updated catalog
        :rtype: dict
        """
        # parse f-strings
        format_args = copy.deepcopy(self.stac_config)
        format_args["catalog"] = defaultdict(str, **self.data)
        format_args["date"] = defaultdict(
            str,
            {
                "year": datetime_min.year,
                "month": datetime_min.month,
                "day": datetime_min.day,
                "min": datetime_min.isoformat().replace("+00:00", "") + "Z",
                "max": datetime_max.isoformat().replace("+00:00", "") + "Z",
            },
        )
        parsed_dict = format_dict_items(catalog_model, **format_args)

        self.update_data(parsed_dict)

        # update search args
        self.search_args.update({
            "dtstart":
            datetime_min.isoformat().split("T")[0],
            "dtend":
            datetime_max.isoformat().split("T")[0],
        })
        return parsed_dict
Ejemplo n.º 12
0
    def query(self,
              product_type=None,
              items_per_page=None,
              page=None,
              count=True,
              **kwargs):
        """Search for data on USGS catalogues

        .. versionchanged::
           2.2.0

                * Based on usgs library v0.3.0 which now uses M2M API. The library
                  is used for both search & download

        .. versionchanged::
            1.0

                * ``product_type`` is no longer mandatory
        """
        product_type = kwargs.get("productType")
        if product_type is None:
            return [], 0
        try:
            api.login(
                self.config.credentials["username"],
                self.config.credentials["password"],
                save=True,
            )
        except USGSError:
            raise AuthenticationError(
                "Please check your USGS credentials.") from None

        product_type_def_params = self.config.products.get(
            product_type, self.config.products[GENERIC_PRODUCT_TYPE])
        usgs_dataset = format_dict_items(product_type_def_params,
                                         **kwargs)["dataset"]
        start_date = kwargs.pop("startTimeFromAscendingNode", None)
        end_date = kwargs.pop("completionTimeFromAscendingNode", None)
        geom = kwargs.pop("geometry", None)
        footprint = {}
        if hasattr(geom, "bounds"):
            (
                footprint["lonmin"],
                footprint["latmin"],
                footprint["lonmax"],
                footprint["latmax"],
            ) = geom.bounds
        else:
            footprint = geom

        final = []
        if footprint and len(footprint.keys()) == 4:  # a rectangle (or bbox)
            lower_left = {
                "longitude": footprint["lonmin"],
                "latitude": footprint["latmin"],
            }
            upper_right = {
                "longitude": footprint["lonmax"],
                "latitude": footprint["latmax"],
            }
        else:
            lower_left, upper_right = None, None
        try:
            results = api.scene_search(
                usgs_dataset,
                start_date=start_date,
                end_date=end_date,
                ll=lower_left,
                ur=upper_right,
                max_results=items_per_page,
                starting_number=(1 + (page - 1) * items_per_page),
            )

            # Same method as in base.py, Search.__init__()
            # Prepare the metadata mapping
            # Do a shallow copy, the structure is flat enough for this to be sufficient
            metas = DEFAULT_METADATA_MAPPING.copy()
            # Update the defaults with the mapping value. This will add any new key
            # added by the provider mapping that is not in the default metadata.
            # A deepcopy is done to prevent self.config.metadata_mapping from being modified when metas[metadata]
            # is a list and is modified
            metas.update(copy.deepcopy(self.config.metadata_mapping))
            metas = mtd_cfg_as_jsonpath(metas)

            for result in results["data"]["results"]:

                result["productType"] = usgs_dataset

                product_properties = properties_from_json(result, metas)

                final.append(
                    EOProduct(
                        productType=product_type,
                        provider=self.provider,
                        properties=product_properties,
                        geometry=footprint,
                    ))
        except USGSError as e:
            logger.warning(
                "Product type %s does not exist on USGS EE catalog",
                usgs_dataset,
            )
            logger.warning("Skipping error: %s", e)
        api.logout()

        if final:
            # parse total_results
            path_parsed = parse(
                self.config.pagination["total_items_nb_key_path"])
            total_results = path_parsed.find(results["data"])[0].value
        else:
            total_results = 0

        return final, total_results
Ejemplo n.º 13
0
    def get_stac_items(self, search_results, catalog):
        """Build STAC items from EODAG search results

        :param search_results: EODAG search results
        :type search_results: :class:`eodag.api.search_result.SearchResult`
        :param catalog: STAC catalog dict used for parsing item metadata
        :type catalog: dict
        :returns: STAC item dicts list
        :rtype: list

        :returns: items dictionnary
        :rtype: dict
        """
        items_model = copy.deepcopy(self.stac_config["items"])

        search_results.numberMatched = search_results.properties[
            "totalResults"]
        search_results.numberReturned = len(search_results)

        # next page link
        if "?" in self.url:
            # search endpoint: use page url as self link
            for i, _ in enumerate(items_model["links"]):
                if items_model["links"][i]["rel"] == "self":
                    items_model["links"][i]["href"] = catalog["url"]
            if "page=" not in self.url:
                search_results.next = "%s&page=%s" % (
                    self.url,
                    search_results.properties["page"] + 1,
                )
            else:
                search_results.next = re.sub(
                    r"^(.*)(page=[0-9]+)(.*)$",
                    r"\1page=%s\3" % (search_results.properties["page"] + 1),
                    self.url,
                )
        else:
            search_results.next = "%s?page=%s" % (
                self.url,
                search_results.properties["page"] + 1,
            )

        search_results.timeStamp = (datetime.datetime.now(
            datetime.timezone.utc).isoformat().replace("+00:00", "") + "Z")

        # parse jsonpath
        items = jsonpath_parse_dict_items(
            items_model, {"search_results": search_results.__dict__})
        # parse f-strings
        format_args = copy.deepcopy(self.stac_config)
        format_args["catalog"] = catalog
        items = format_dict_items(items, **format_args)

        # last page: remove next page link
        if (search_results.properties["itemsPerPage"] *
                search_results.properties["page"] >=
                search_results.properties["totalResults"]):
            items["links"] = [
                link for link in items["links"] if link["rel"] != "next"
            ]

        # provide static catalog to build features
        if "search?" in catalog["url"]:
            catalog["url"] = os.path.join(
                catalog["url"].split("search?")[0],
                "collections",
                catalog["id"],
            )
        else:
            catalog["url"] = catalog["url"].split("?")[0]
        items["features"] = self.__get_item_list(search_results, catalog)

        self.update_data(items)
        return self.as_dict()