Example #1
0
    def query(self, product_type=None, **kwargs):
        """Search for data on USGS catalogues

        .. versionchanged::
            1.0

                * ``product_type`` is no longer mandatory
        """
        product_type = kwargs.get("productType")
        if product_type is None:
            return [], 0
        api.login(
            self.config.credentials["username"],
            self.config.credentials["password"],
            save=True,
        )
        usgs_dataset = self.config.products[product_type]["dataset"]
        usgs_catalog_node = self.config.products[product_type]["catalog_node"]
        start_date = kwargs.pop("startTimeFromAscendingNode", None)
        end_date = kwargs.pop("completionTimeFromAscendingNode", None)
        footprint = kwargs.pop("geometry", None)

        # Configuration to generate the download url of search results
        result_summary_pattern = re.compile(
            r"^ID: .+, Acquisition Date: .+, Path: (?P<path>\d+), Row: (?P<row>\d+)$"  # noqa
        )
        # See https://pyformat.info/, on section "Padding and aligning strings" to
        # understand {path:0>3} and {row:0>3}.
        # It roughly means: 'if the string that will be passed as "path" has length < 3,
        # prepend as much "0"s as needed to reach length 3' and same for "row"
        dl_url_pattern = "{base_url}/L8/{path:0>3}/{row:0>3}/{entity}.tar.bz"

        final = []
        if footprint and len(footprint.keys()) == 4:  # a rectangle (or bbox)
            lower_left = {
                "longitude": footprint["lonmin"],
                "latitude": footprint["latmin"],
            }
            upper_right = {
                "longitude": footprint["lonmax"],
                "latitude": footprint["latmax"],
            }
        else:
            lower_left, upper_right = None, None
        try:
            results = api.search(
                usgs_dataset,
                usgs_catalog_node,
                start_date=start_date,
                end_date=end_date,
                ll=lower_left,
                ur=upper_right,
            )

            for result in results["data"]["results"]:
                r_lower_left = result["spatialFootprint"]["coordinates"][0][0]
                r_upper_right = result["spatialFootprint"]["coordinates"][0][2]
                summary_match = result_summary_pattern.match(
                    result["summary"]).groupdict()
                result["geometry"] = geometry.box(r_lower_left[0],
                                                  r_lower_left[1],
                                                  r_upper_right[0],
                                                  r_upper_right[1])

                # Same method as in base.py, Search.__init__()
                # Prepare the metadata mapping
                # Do a shallow copy, the structure is flat enough for this to be sufficient
                metas = DEFAULT_METADATA_MAPPING.copy()
                # Update the defaults with the mapping value. This will add any new key
                # added by the provider mapping that is not in the default metadata.
                # A deepcopy is done to prevent self.config.metadata_mapping from being modified when metas[metadata]
                # is a list and is modified
                metas.update(copy.deepcopy(self.config.metadata_mapping))
                metas = mtd_cfg_as_jsonpath(metas)

                result["productType"] = usgs_dataset

                product_properties = properties_from_json(result, metas)

                if getattr(self.config, "product_location_scheme",
                           "https") == "file":
                    product_properties["downloadLink"] = dl_url_pattern.format(
                        base_url="file://")
                else:
                    product_properties["downloadLink"] = dl_url_pattern.format(
                        base_url=self.config.google_base_url.rstrip("/"),
                        entity=result["entityId"],
                        **summary_match)

                final.append(
                    EOProduct(
                        productType=product_type,
                        provider=self.provider,
                        properties=product_properties,
                        geometry=footprint,
                    ))
        except USGSError as e:
            logger.debug(
                "Product type %s does not exist on catalogue %s",
                usgs_dataset,
                usgs_catalog_node,
            )
            logger.debug("Skipping error: %s", e)
        api.logout()
        return final, len(final)
Example #2
0
    def download(self, product, auth=None, progress_callback=None, **kwargs):
        """Download method for AWS S3 API.

        :param product: The EO product to download
        :type product: :class:`~eodag.api.product.EOProduct`
        :param auth: (optional) The configuration of a plugin of type Authentication
        :type auth: :class:`~eodag.config.PluginConfig`
        :param progress_callback: (optional) A method or a callable object
                                  which takes a current size and a maximum
                                  size as inputs and handle progress bar
                                  creation and update to give the user a
                                  feedback on the download progress
        :type progress_callback: :class:`~eodag.utils.ProgressCallback` or None
        :return: The absolute path to the downloaded product in the local filesystem
        :rtype: str or unicode
        """
        product_conf = self.config.products.get(product.product_type, {})

        build_safe = product_conf.get("build_safe", False)

        # xtra metadata needed for SAFE product
        if build_safe and "fetch_metadata" in product_conf.keys():
            fetch_format = product_conf["fetch_metadata"]["fetch_format"]
            update_metadata = product_conf["fetch_metadata"]["update_metadata"]
            fetch_url = product_conf["fetch_metadata"]["fetch_url"].format(
                **product.properties)
            if fetch_format == "json":
                logger.info("Fetching extra metadata from %s" % fetch_url)
                resp = requests.get(fetch_url)
                json_resp = resp.json()
                update_metadata = mtd_cfg_as_jsonpath(update_metadata)
                update_metadata = properties_from_json(json_resp,
                                                       update_metadata)
                product.properties.update(update_metadata)
            else:
                logger.warning(
                    "SAFE metadata fetch format %s not implemented" %
                    fetch_format)

        bucket_names_and_prefixes = [self.get_bucket_name_and_prefix(product)]
        # add complementary urls
        for complementary_url_key in product_conf.get("complementary_url_key",
                                                      []):
            bucket_names_and_prefixes.append(
                self.get_bucket_name_and_prefix(
                    product, product.properties[complementary_url_key]))

        # prepare download & create dirs
        product_local_path, record_filename = self._prepare_download(product)
        if not product_local_path or not record_filename:
            return product_local_path
        product_local_path = product_local_path.replace(".zip", "")
        # remove existing incomplete file
        if os.path.isfile(product_local_path):
            os.remove(product_local_path)
        # create product dest dir
        if not os.path.isdir(product_local_path):
            os.makedirs(product_local_path)

        with tqdm(
                total=len(bucket_names_and_prefixes),
                unit="parts",
                desc="Downloading product parts",
        ) as bar:

            for bucket_name, prefix in bucket_names_and_prefixes:
                # connect to aws s3
                access_key, access_secret = auth
                s3 = boto3.resource(
                    "s3",
                    aws_access_key_id=access_key,
                    aws_secret_access_key=access_secret,
                )
                bucket = s3.Bucket(bucket_name)

                total_size = sum([
                    p.size
                    for p in bucket.objects.filter(Prefix=prefix,
                                                   RequestPayer="requester")
                ])
                progress_callback.max_size = total_size
                for product_chunk in bucket.objects.filter(
                        Prefix=prefix, RequestPayer="requester"):
                    chunck_rel_path = self.get_chunck_dest_path(
                        product,
                        product_chunk,
                        build_safe=build_safe,
                        dir_prefix=prefix)
                    chunck_abs_path = os.path.join(product_local_path,
                                                   chunck_rel_path)
                    chunck_abs_path_dir = os.path.dirname(chunck_abs_path)
                    if not os.path.isdir(chunck_abs_path_dir):
                        os.makedirs(chunck_abs_path_dir)

                    if not os.path.isfile(chunck_abs_path):
                        bucket.download_file(
                            product_chunk.key,
                            chunck_abs_path,
                            ExtraArgs={"RequestPayer": "requester"},
                            Callback=progress_callback,
                        )
                bar.update(1)

        # finalize safe product
        if build_safe and "S2_MSI" in product.product_type:
            self.finalize_s2_safe_product(product_local_path)

        # save hash/record file
        with open(record_filename, "w") as fh:
            fh.write(product.remote_location)
        logger.debug("Download recorded in %s", record_filename)

        return product_local_path
Example #3
0
    def download(self, product, auth=None, progress_callback=None, **kwargs):
        """Download method for AWS S3 API.

        :param product: The EO product to download
        :type product: :class:`~eodag.api.product.EOProduct`
        :param auth: (optional) The configuration of a plugin of type Authentication
        :type auth: :class:`~eodag.config.PluginConfig`
        :param progress_callback: (optional) A method or a callable object
                                  which takes a current size and a maximum
                                  size as inputs and handle progress bar
                                  creation and update to give the user a
                                  feedback on the download progress
        :type progress_callback: :class:`~eodag.utils.ProgressCallback` or None
        :return: The absolute path to the downloaded product in the local filesystem
        :rtype: str
        """
        product_conf = getattr(self.config, "products",
                               {}).get(product.product_type, {})

        build_safe = product_conf.get("build_safe", False)

        # product conf overrides provider conf for "flatten_top_dirs"
        flatten_top_dirs = product_conf.get(
            "flatten_top_dirs", getattr(self.config, "flatten_top_dirs",
                                        False))

        # xtra metadata needed for SAFE product
        if build_safe and "fetch_metadata" in product_conf.keys():
            fetch_format = product_conf["fetch_metadata"]["fetch_format"]
            update_metadata = product_conf["fetch_metadata"]["update_metadata"]
            fetch_url = product_conf["fetch_metadata"]["fetch_url"].format(
                **product.properties)
            if fetch_format == "json":
                logger.info("Fetching extra metadata from %s" % fetch_url)
                resp = requests.get(fetch_url)
                json_resp = resp.json()
                update_metadata = mtd_cfg_as_jsonpath(update_metadata)
                update_metadata = properties_from_json(json_resp,
                                                       update_metadata)
                product.properties.update(update_metadata)
            else:
                logger.warning(
                    "SAFE metadata fetch format %s not implemented" %
                    fetch_format)
        # if assets are defined, use them instead of scanning product.location
        if hasattr(product, "assets"):
            bucket_names_and_prefixes = []
            for complementary_url in getattr(product, "assets", {}).values():
                bucket_names_and_prefixes.append(
                    self.get_bucket_name_and_prefix(
                        product, complementary_url.get("href", "")))
        else:
            bucket_names_and_prefixes = [
                self.get_bucket_name_and_prefix(product)
            ]

        # add complementary urls
        for complementary_url_key in product_conf.get("complementary_url_key",
                                                      []):
            bucket_names_and_prefixes.append(
                self.get_bucket_name_and_prefix(
                    product, product.properties[complementary_url_key]))

        # prepare download & create dirs
        product_local_path, record_filename = self._prepare_download(
            product, **kwargs)
        if not product_local_path or not record_filename:
            return product_local_path
        product_local_path = product_local_path.replace(".zip", "")
        # remove existing incomplete file
        if os.path.isfile(product_local_path):
            os.remove(product_local_path)
        # create product dest dir
        if not os.path.isdir(product_local_path):
            os.makedirs(product_local_path)

        # progress bar init
        if progress_callback is None:
            progress_callback = get_progress_callback()
        progress_callback.desc = product.properties.get("id", "")
        progress_callback.position = 1

        # authenticate & get product size
        authenticated_objects = {}
        total_size = 0
        auth_error_messages = set()
        for idx, pack in enumerate(bucket_names_and_prefixes):
            try:
                bucket_name, prefix = pack
                if bucket_name not in authenticated_objects:
                    # get Prefixes longest common base path
                    common_prefix = ""
                    prefix_split = prefix.split("/")
                    prefixes_in_bucket = len([
                        p for b, p in bucket_names_and_prefixes
                        if b == bucket_name
                    ])
                    for i in range(1, len(prefix_split)):
                        common_prefix = "/".join(prefix_split[0:i])
                        if (len([
                                p for b, p in bucket_names_and_prefixes
                                if b == bucket_name and common_prefix in p
                        ]) < prefixes_in_bucket):
                            common_prefix = "/".join(prefix_split[0:i - 1])
                            break
                    # connect to aws s3 and get bucket auhenticated objects
                    s3_objects = self.get_authenticated_objects(
                        bucket_name, common_prefix, auth)
                    authenticated_objects[bucket_name] = s3_objects
                else:
                    s3_objects = authenticated_objects[bucket_name]

                total_size += sum(
                    [p.size for p in s3_objects.filter(Prefix=prefix)])

            except AuthenticationError as e:
                logger.warning("Unexpected error: %s" % e)
                logger.warning("Skipping %s/%s" % (bucket_name, prefix))
                auth_error_messages.add(str(e))
            except ClientError as e:
                err = e.response["Error"]
                auth_messages = [
                    "AccessDenied",
                    "InvalidAccessKeyId",
                    "SignatureDoesNotMatch",
                ]
                if err["Code"] in auth_messages and "key" in err[
                        "Message"].lower():
                    raise AuthenticationError(
                        "HTTP error {} returned\n{}: {}\nPlease check your credentials for {}"
                        .format(
                            e.response["ResponseMetadata"]["HTTPStatusCode"],
                            err["Code"],
                            err["Message"],
                            self.provider,
                        ))
                logger.warning("Unexpected error: %s" % e)
                logger.warning("Skipping %s/%s" % (bucket_name, prefix))
                auth_error_messages.add(str(e))

        # could not auth on any bucket
        if not authenticated_objects:
            raise AuthenticationError(", ".join(auth_error_messages))

        # bucket_names_and_prefixes with unauthenticated items filtered out
        auth_bucket_names_and_prefixes = [
            p for p in bucket_names_and_prefixes
            if p[0] in authenticated_objects.keys()
        ]

        # download
        progress_callback.max_size = total_size
        progress_callback.reset()
        for bucket_name, prefix in auth_bucket_names_and_prefixes:
            try:
                s3_objects = authenticated_objects[bucket_name]

                for product_chunk in s3_objects.filter(Prefix=prefix, ):
                    chunck_rel_path = self.get_chunck_dest_path(
                        product,
                        product_chunk,
                        build_safe=build_safe,
                        dir_prefix=prefix,
                    )
                    chunck_abs_path = os.path.join(product_local_path,
                                                   chunck_rel_path)
                    chunck_abs_path_dir = os.path.dirname(chunck_abs_path)
                    if not os.path.isdir(chunck_abs_path_dir):
                        os.makedirs(chunck_abs_path_dir)

                    if not os.path.isfile(chunck_abs_path):
                        product_chunk.Bucket().download_file(
                            product_chunk.key,
                            chunck_abs_path,
                            ExtraArgs=getattr(s3_objects, "_params", {}),
                            Callback=progress_callback,
                        )

            except AuthenticationError as e:
                logger.warning("Unexpected error: %s" % e)
                logger.warning("Skipping %s/%s" % (bucket_name, prefix))
            except ClientError as e:
                err = e.response["Error"]
                auth_messages = [
                    "AccessDenied",
                    "InvalidAccessKeyId",
                    "SignatureDoesNotMatch",
                ]
                if err["Code"] in auth_messages and "key" in err[
                        "Message"].lower():
                    raise AuthenticationError(
                        "HTTP error {} returned\n{}: {}\nPlease check your credentials for {}"
                        .format(
                            e.response["ResponseMetadata"]["HTTPStatusCode"],
                            err["Code"],
                            err["Message"],
                            self.provider,
                        ))
                logger.warning("Unexpected error: %s" % e)
                logger.warning("Skipping %s/%s" % (bucket_name, prefix))

        # finalize safe product
        if build_safe and "S2_MSI" in product.product_type:
            self.finalize_s2_safe_product(product_local_path)
        # flatten directory structure
        elif flatten_top_dirs:
            tmp_product_local_path = "%s-tmp" % product_local_path
            for d, dirs, files in os.walk(product_local_path):
                if len(files) != 0:
                    shutil.copytree(d, tmp_product_local_path)
                    shutil.rmtree(product_local_path)
                    os.rename(tmp_product_local_path, product_local_path)
                    break

        # save hash/record file
        with open(record_filename, "w") as fh:
            fh.write(product.remote_location)
        logger.debug("Download recorded in %s", record_filename)

        return product_local_path
Example #4
0
    def query(self,
              product_type=None,
              items_per_page=None,
              page=None,
              count=True,
              **kwargs):
        """Search for data on USGS catalogues

        .. versionchanged::
           2.2.0

                * Based on usgs library v0.3.0 which now uses M2M API. The library
                  is used for both search & download

        .. versionchanged::
            1.0

                * ``product_type`` is no longer mandatory
        """
        product_type = kwargs.get("productType")
        if product_type is None:
            return [], 0
        try:
            api.login(
                self.config.credentials["username"],
                self.config.credentials["password"],
                save=True,
            )
        except USGSError:
            raise AuthenticationError(
                "Please check your USGS credentials.") from None

        product_type_def_params = self.config.products.get(
            product_type, self.config.products[GENERIC_PRODUCT_TYPE])
        usgs_dataset = format_dict_items(product_type_def_params,
                                         **kwargs)["dataset"]
        start_date = kwargs.pop("startTimeFromAscendingNode", None)
        end_date = kwargs.pop("completionTimeFromAscendingNode", None)
        geom = kwargs.pop("geometry", None)
        footprint = {}
        if hasattr(geom, "bounds"):
            (
                footprint["lonmin"],
                footprint["latmin"],
                footprint["lonmax"],
                footprint["latmax"],
            ) = geom.bounds
        else:
            footprint = geom

        final = []
        if footprint and len(footprint.keys()) == 4:  # a rectangle (or bbox)
            lower_left = {
                "longitude": footprint["lonmin"],
                "latitude": footprint["latmin"],
            }
            upper_right = {
                "longitude": footprint["lonmax"],
                "latitude": footprint["latmax"],
            }
        else:
            lower_left, upper_right = None, None
        try:
            results = api.scene_search(
                usgs_dataset,
                start_date=start_date,
                end_date=end_date,
                ll=lower_left,
                ur=upper_right,
                max_results=items_per_page,
                starting_number=(1 + (page - 1) * items_per_page),
            )

            # Same method as in base.py, Search.__init__()
            # Prepare the metadata mapping
            # Do a shallow copy, the structure is flat enough for this to be sufficient
            metas = DEFAULT_METADATA_MAPPING.copy()
            # Update the defaults with the mapping value. This will add any new key
            # added by the provider mapping that is not in the default metadata.
            # A deepcopy is done to prevent self.config.metadata_mapping from being modified when metas[metadata]
            # is a list and is modified
            metas.update(copy.deepcopy(self.config.metadata_mapping))
            metas = mtd_cfg_as_jsonpath(metas)

            for result in results["data"]["results"]:

                result["productType"] = usgs_dataset

                product_properties = properties_from_json(result, metas)

                final.append(
                    EOProduct(
                        productType=product_type,
                        provider=self.provider,
                        properties=product_properties,
                        geometry=footprint,
                    ))
        except USGSError as e:
            logger.warning(
                "Product type %s does not exist on USGS EE catalog",
                usgs_dataset,
            )
            logger.warning("Skipping error: %s", e)
        api.logout()

        if final:
            # parse total_results
            path_parsed = parse(
                self.config.pagination["total_items_nb_key_path"])
            total_results = path_parsed.find(results["data"])[0].value
        else:
            total_results = 0

        return final, total_results
Example #5
0
    def download(self, product, auth=None, progress_callback=None, **kwargs):
        """Download method for AWS S3 API.

        :param product: The EO product to download
        :type product: :class:`~eodag.api.product.EOProduct`
        :param auth: (optional) The configuration of a plugin of type Authentication
        :type auth: :class:`~eodag.config.PluginConfig`
        :param progress_callback: (optional) A method or a callable object
                                  which takes a current size and a maximum
                                  size as inputs and handle progress bar
                                  creation and update to give the user a
                                  feedback on the download progress
        :type progress_callback: :class:`~eodag.utils.ProgressCallback` or None
        :return: The absolute path to the downloaded product in the local filesystem
        :rtype: str
        """
        product_conf = getattr(self.config, "products",
                               {}).get(product.product_type, {})

        build_safe = product_conf.get("build_safe", False)

        # product conf overrides provider conf for "flatten_top_dirs"
        flatten_top_dirs = product_conf.get(
            "flatten_top_dirs", getattr(self.config, "flatten_top_dirs",
                                        False))

        # xtra metadata needed for SAFE product
        if build_safe and "fetch_metadata" in product_conf.keys():
            fetch_format = product_conf["fetch_metadata"]["fetch_format"]
            update_metadata = product_conf["fetch_metadata"]["update_metadata"]
            fetch_url = product_conf["fetch_metadata"]["fetch_url"].format(
                **product.properties)
            if fetch_format == "json":
                logger.info("Fetching extra metadata from %s" % fetch_url)
                resp = requests.get(fetch_url)
                json_resp = resp.json()
                update_metadata = mtd_cfg_as_jsonpath(update_metadata)
                update_metadata = properties_from_json(json_resp,
                                                       update_metadata)
                product.properties.update(update_metadata)
            else:
                logger.warning(
                    "SAFE metadata fetch format %s not implemented" %
                    fetch_format)
        # if assets are defined, use them instead of scanning product.location
        if hasattr(product, "assets"):
            bucket_names_and_prefixes = []
            for complementary_url in getattr(product, "assets", {}).values():
                bucket_names_and_prefixes.append(
                    self.get_bucket_name_and_prefix(
                        product, complementary_url.get("href", "")))
        else:
            bucket_names_and_prefixes = [
                self.get_bucket_name_and_prefix(product)
            ]

        # add complementary urls
        for complementary_url_key in product_conf.get("complementary_url_key",
                                                      []):
            bucket_names_and_prefixes.append(
                self.get_bucket_name_and_prefix(
                    product, product.properties[complementary_url_key]))

        # prepare download & create dirs
        product_local_path, record_filename = self._prepare_download(
            product, **kwargs)
        if not product_local_path or not record_filename:
            return product_local_path
        product_local_path = product_local_path.replace(".zip", "")
        # remove existing incomplete file
        if os.path.isfile(product_local_path):
            os.remove(product_local_path)
        # create product dest dir
        if not os.path.isdir(product_local_path):
            os.makedirs(product_local_path)

        with tqdm(
                total=len(bucket_names_and_prefixes),
                unit="parts",
                desc="Downloading product parts",
        ) as bar:

            for bucket_name, prefix in bucket_names_and_prefixes:
                try:
                    # connect to aws s3
                    access_key, access_secret = auth
                    s3 = boto3.resource(
                        "s3",
                        aws_access_key_id=access_key,
                        aws_secret_access_key=access_secret,
                    )
                    bucket = s3.Bucket(bucket_name)

                    total_size = sum([
                        p.size for p in bucket.objects.filter(
                            Prefix=prefix, RequestPayer="requester")
                    ])
                    progress_callback.max_size = total_size
                    for product_chunk in bucket.objects.filter(
                            Prefix=prefix, RequestPayer="requester"):
                        chunck_rel_path = self.get_chunck_dest_path(
                            product,
                            product_chunk,
                            build_safe=build_safe,
                            dir_prefix=prefix,
                        )
                        chunck_abs_path = os.path.join(product_local_path,
                                                       chunck_rel_path)
                        chunck_abs_path_dir = os.path.dirname(chunck_abs_path)
                        if not os.path.isdir(chunck_abs_path_dir):
                            os.makedirs(chunck_abs_path_dir)

                        if not os.path.isfile(chunck_abs_path):
                            bucket.download_file(
                                product_chunk.key,
                                chunck_abs_path,
                                ExtraArgs={"RequestPayer": "requester"},
                                Callback=progress_callback,
                            )
                except ClientError as e:
                    err = e.response["Error"]
                    auth_messages = [
                        "InvalidAccessKeyId", "SignatureDoesNotMatch"
                    ]
                    if err["Code"] in auth_messages and "key" in err[
                            "Message"].lower():
                        raise AuthenticationError(
                            "HTTP error {} returned\n{}: {}\nPlease check your credentials for {}"
                            .format(
                                e.response["ResponseMetadata"]
                                ["HTTPStatusCode"],
                                err["Code"],
                                err["Message"],
                                self.provider,
                            ))
                    logger.warning("Unexpected error: %s" % e)
                    logger.warning("Skipping %s/%s" % (bucket_name, prefix))
                bar.update(1)

        # finalize safe product
        if build_safe and "S2_MSI" in product.product_type:
            self.finalize_s2_safe_product(product_local_path)
        # flatten directory structure
        elif flatten_top_dirs:
            tmp_product_local_path = "%s-tmp" % product_local_path
            for d, dirs, files in os.walk(product_local_path):
                if len(files) != 0:
                    shutil.copytree(d, tmp_product_local_path)
                    shutil.rmtree(product_local_path)
                    os.rename(tmp_product_local_path, product_local_path)
                    break

        # save hash/record file
        with open(record_filename, "w") as fh:
            fh.write(product.remote_location)
        logger.debug("Download recorded in %s", record_filename)

        return product_local_path