Exemple #1
0
    def extract(self, product_info: dict) -> str:
        """
        Extract products if needed.

        :param product_info: Product info
        :return: Path (archive or extracted according to the config)
        """
        # Extract them if needed
        if self.config.extract and product_info["path"].endswith(".zip"):
            logger.info("Extraction activated")
            with zipfile.ZipFile(product_info["path"], "r") as zfile:
                fileinfos = zfile.infolist()
                with get_progress_callback() as bar:
                    bar.max_size = len(fileinfos)
                    bar.unit = "file"
                    bar.desc = "Extracting files from {}".format(
                        product_info["path"])
                    bar.unit_scale = False
                    bar.position = 2
                    for fileinfo in fileinfos:
                        zfile.extract(fileinfo,
                                      path=self.config.outputs_prefix)
                        bar(1)
            return product_info["path"][:product_info["path"].index(".zip")]
        else:
            return product_info["path"]
Exemple #2
0
    def download(
        self,
        product,
        auth=None,
        progress_callback=None,
        wait=DEFAULT_DOWNLOAD_WAIT,
        timeout=DEFAULT_DOWNLOAD_TIMEOUT,
        **kwargs
    ):
        """Download a product using HTTP protocol.

        The downloaded product is assumed to be a Zip file. If it is not,
        the user is warned, it is renamed to remove the zip extension and
        no further treatment is done (no extraction)
        """
        fs_path, record_filename = self._prepare_download(product, **kwargs)
        if not fs_path or not record_filename:
            return fs_path

        # progress bar init
        if progress_callback is None:
            progress_callback = get_progress_callback()
        progress_callback.desc = product.properties.get("id", "")
        progress_callback.position = 1

        # download assets if exist instead of remote_location
        try:
            return self._download_assets(
                product,
                fs_path.replace(".zip", ""),
                record_filename,
                auth,
                progress_callback,
                **kwargs
            )
        except NotAvailableError:
            pass

        url = product.remote_location

        # order product if it is offline
        ordered_message = ""
        if (
            "orderLink" in product.properties
            and "storageStatus" in product.properties
            and product.properties["storageStatus"] == OFFLINE_STATUS
        ):
            order_method = getattr(self.config, "order_method", "GET")
            with requests.request(
                method=order_method,
                url=product.properties["orderLink"],
                auth=auth,
                headers=getattr(self.config, "order_headers", {}),
            ) as response:
                try:
                    response.raise_for_status()
                    ordered_message = response.text
                    logger.debug(ordered_message)
                except HTTPError as e:
                    logger.warning(
                        "%s could not be ordered, request returned %s",
                        product.properties["title"],
                        e,
                    )

        # initiate retry loop
        start_time = datetime.now()
        stop_time = datetime.now() + timedelta(minutes=timeout)
        product.next_try = start_time
        retry_count = 0
        not_available_info = "The product could not be downloaded"
        # another output for notebooks
        nb_info = NotebookWidgets()

        while "Loop until products download succeeds or timeout is reached":

            if datetime.now() >= product.next_try:
                product.next_try += timedelta(minutes=wait)
                try:
                    params = kwargs.pop("dl_url_params", None) or getattr(
                        self.config, "dl_url_params", {}
                    )
                    with requests.get(
                        url,
                        stream=True,
                        auth=auth,
                        params=params,
                    ) as stream:
                        try:
                            stream.raise_for_status()
                        except HTTPError as e:
                            # check if error is identified as auth_error in provider conf
                            auth_errors = getattr(
                                self.config, "auth_error_code", [None]
                            )
                            if not isinstance(auth_errors, list):
                                auth_errors = [auth_errors]
                            if e.response.status_code in auth_errors:
                                raise AuthenticationError(
                                    "HTTP Error %s returned, %s\nPlease check your credentials for %s"
                                    % (
                                        e.response.status_code,
                                        e.response.text.strip(),
                                        self.provider,
                                    )
                                )
                            # product not available
                            elif (
                                product.properties.get("storageStatus", ONLINE_STATUS)
                                != ONLINE_STATUS
                            ):
                                msg = (
                                    ordered_message
                                    if ordered_message and not e.response.text
                                    else e.response.text
                                )
                                raise NotAvailableError(
                                    "%s(initially %s) requested, returned: %s"
                                    % (
                                        product.properties["title"],
                                        product.properties["storageStatus"],
                                        msg,
                                    )
                                )
                            else:
                                import traceback as tb

                                logger.error(
                                    "Error while getting resource :\n%s",
                                    tb.format_exc(),
                                )
                        else:
                            stream_size = int(stream.headers.get("content-length", 0))
                            if (
                                stream_size == 0
                                and "storageStatus" in product.properties
                                and product.properties["storageStatus"] != ONLINE_STATUS
                            ):
                                raise NotAvailableError(
                                    "%s(initially %s) ordered, got: %s"
                                    % (
                                        product.properties["title"],
                                        product.properties["storageStatus"],
                                        stream.reason,
                                    )
                                )
                            progress_callback.max_size = stream_size
                            progress_callback.reset()
                            with open(fs_path, "wb") as fhandle:
                                for chunk in stream.iter_content(chunk_size=64 * 1024):
                                    if chunk:
                                        fhandle.write(chunk)
                                        progress_callback(len(chunk), stream_size)

                            with open(record_filename, "w") as fh:
                                fh.write(url)
                            logger.debug("Download recorded in %s", record_filename)

                            # Check that the downloaded file is really a zip file
                            if not zipfile.is_zipfile(fs_path):
                                logger.warning(
                                    "Downloaded product is not a Zip File. Please check its file type before using it"
                                )
                                new_fs_path = fs_path[: fs_path.index(".zip")]
                                shutil.move(fs_path, new_fs_path)
                                return new_fs_path
                            return self._finalize(fs_path, **kwargs)

                except NotAvailableError as e:
                    if not getattr(self.config, "order_enabled", False):
                        raise NotAvailableError(
                            "Product is not available for download and order is not supported for %s, %s"
                            % (self.provider, e)
                        )
                    not_available_info = e
                    pass

            if datetime.now() < product.next_try and datetime.now() < stop_time:
                wait_seconds = (product.next_try - datetime.now()).seconds
                retry_count += 1
                retry_info = (
                    "[Retry #%s] Waiting %ss until next download try (retry every %s' for %s')"
                    % (retry_count, wait_seconds, wait, timeout)
                )
                logger.debug(not_available_info)
                # Retry-After info from Response header
                retry_server_info = stream.headers.get("Retry-After", "")
                if retry_server_info:
                    logger.debug(
                        "[%s response] Retry-After: %s"
                        % (self.provider, retry_server_info)
                    )
                logger.info(retry_info)
                nb_info.display_html(retry_info)
                sleep(wait_seconds + 1)
            elif datetime.now() >= stop_time and timeout > 0:
                if "storageStatus" not in product.properties:
                    product.properties["storageStatus"] = "N/A status"
                logger.info(not_available_info)
                raise NotAvailableError(
                    "%s is not available (%s) and could not be downloaded, timeout reached"
                    % (product.properties["title"], product.properties["storageStatus"])
                )
            elif datetime.now() >= stop_time:
                raise NotAvailableError(not_available_info)
Exemple #3
0
    def download_all(
        self,
        products,
        auth=None,
        progress_callback=None,
        wait=DEFAULT_DOWNLOAD_WAIT,
        timeout=DEFAULT_DOWNLOAD_TIMEOUT,
        **kwargs,
    ):
        """
        A sequential download_all implementation
        using download method for every products
        """
        paths = []
        # initiate retry loop
        start_time = datetime.now()
        stop_time = datetime.now() + timedelta(minutes=timeout)
        nb_products = len(products)
        retry_count = 0
        # another output for notbooks
        nb_info = NotebookWidgets()

        for product in products:
            product.next_try = start_time

        with get_progress_callback() as bar:
            bar.max_size = nb_products
            bar.unit = "product"
            bar.desc = "Downloaded products"
            bar.unit_scale = False
            bar(0)

            while "Loop until all products are download or timeout is reached":
                # try downloading each product before retry
                for idx, product in enumerate(products):
                    if datetime.now() >= product.next_try:
                        products[idx].next_try += timedelta(minutes=wait)
                        try:
                            if product.downloader is None:
                                raise RuntimeError(
                                    "EO product is unable to download itself due to lacking of a "
                                    "download plugin"
                                )

                            auth = (
                                product.downloader_auth.authenticate()
                                if product.downloader_auth is not None
                                else product.downloader_auth
                            )
                            # resolve remote location if needed with downloader configuration
                            product.remote_location = product.remote_location % vars(
                                product.downloader.config
                            )

                            paths.append(
                                self.download(
                                    product,
                                    auth=auth,
                                    progress_callback=progress_callback,
                                    wait=wait,
                                    timeout=-1,
                                    **kwargs,
                                )
                            )

                            # product downloaded, to not retry it
                            products.remove(product)
                            bar(1)

                            # reset stop time for next product
                            stop_time = datetime.now() + timedelta(minutes=timeout)

                        except NotAvailableError as e:
                            logger.info(e)
                            continue

                        except (AuthenticationError, MisconfiguredError):
                            logger.exception(
                                "Stopped because of credentials problems with provider %s",
                                self.provider,
                            )
                            raise

                        except RuntimeError:
                            import traceback as tb

                            logger.error(
                                "A problem occurred during download of product: %s. "
                                "Skipping it",
                                product,
                            )
                            logger.debug("\n%s", tb.format_exc())
                            stop_time = datetime.now()

                        except Exception:
                            import traceback as tb

                            logger.warning(
                                "A problem occurred during download of product: %s. "
                                "Skipping it",
                                product,
                            )
                            logger.debug("\n%s", tb.format_exc())

                if (
                    len(products) > 0
                    and datetime.now() < products[0].next_try
                    and datetime.now() < stop_time
                ):
                    wait_seconds = (products[0].next_try - datetime.now()).seconds
                    retry_count += 1
                    info_message = (
                        f"[Retry #{retry_count}, {nb_products - len(products)}/{nb_products} D/L] "
                        f"Waiting {wait_seconds}s until next download try (retry every {wait}' for {timeout}')"
                    )
                    logger.info(info_message)
                    nb_info.display_html(info_message)
                    sleep(wait_seconds + 1)
                elif len(products) > 0 and datetime.now() >= stop_time:
                    logger.warning(
                        "%s products could not be downloaded: %s",
                        len(products),
                        [prod.properties["title"] for prod in products],
                    )
                    break
                elif len(products) == 0:
                    break

        if hasattr(progress_callback, "pb") and hasattr(progress_callback.pb, "close"):
            progress_callback.pb.close()

        return paths
Exemple #4
0
    def _finalize(self, fs_path, **kwargs):
        """Finalize the download process.

        :param fs_path: The path to the local zip archive downloaded or already present
        :type fs_path: str
        :return: the absolute path to the product
        """
        extract = kwargs.pop("extract", None)
        extract = (
            extract if extract is not None else getattr(self.config, "extract", False)
        )
        if not extract:
            logger.info("Extraction not activated. The product is available as is.")
            return fs_path
        product_path = (
            fs_path[: fs_path.index(".zip")] if ".zip" in fs_path else fs_path
        )
        product_path_exists = os.path.exists(product_path)
        if product_path_exists and os.path.isfile(product_path):
            logger.info(
                "Remove existing partially downloaded file: %s (%s/%s)"
                % (
                    product_path,
                    os.stat(product_path).st_size,
                    os.stat(fs_path).st_size,
                )
            )
            os.remove(product_path)
        elif (
            product_path_exists
            and os.path.isdir(product_path)
            and len(os.listdir(product_path)) == 0
        ):
            logger.info(
                "Remove existing empty destination directory: %s" % product_path
            )
            os.rmdir(product_path)
        elif (
            product_path_exists
            and os.path.isdir(product_path)
            and len(os.listdir(product_path)) > 0
        ):
            logger.info(
                "Extraction cancelled, destination directory already exists and is not empty: %s"
                % product_path
            )
            return product_path
        outputs_prefix = (
            kwargs.pop("outputs_prefix", None) or self.config.outputs_prefix
        )
        if not os.path.exists(product_path):
            logger.info("Extraction activated")
            with zipfile.ZipFile(fs_path, "r") as zfile:
                fileinfos = zfile.infolist()
                with get_progress_callback() as bar:
                    bar.max_size = len(fileinfos)
                    bar.unit = "file"
                    bar.desc = "Extracting files from {}".format(
                        os.path.basename(fs_path)
                    )
                    bar.unit_scale = False
                    bar.position = 2
                    for fileinfo in fileinfos:
                        zfile.extract(
                            fileinfo,
                            path=os.path.join(outputs_prefix, product_path),
                        )
                        bar(1)
        # Handle depth levels in the product archive. For example, if the downloaded archive was
        # extracted to: /top_level/product_base_dir and archive_depth was configured to 2, the product
        # location will be /top_level/product_base_dir.
        # WARNING: A strong assumption is made here: there is only one subdirectory per level
        archive_depth = getattr(self.config, "archive_depth", 1)
        count = 1
        while count < archive_depth:
            product_path = os.path.join(product_path, os.listdir(product_path)[0])
            count += 1
        return product_path
Exemple #5
0
    def download(self, product, auth=None, progress_callback=None, **kwargs):
        """Download data from USGS catalogues"""

        fs_path, record_filename = self._prepare_download(
            product, outputs_extension=".tar.gz", **kwargs)
        if not fs_path or not record_filename:
            return fs_path

        # progress bar init
        if progress_callback is None:
            progress_callback = get_progress_callback()
        progress_callback.desc = product.properties.get("id", "")
        progress_callback.position = 1

        try:
            api.login(
                self.config.credentials["username"],
                self.config.credentials["password"],
                save=True,
            )
        except USGSError:
            raise AuthenticationError(
                "Please check your USGS credentials.") from None

        download_options = api.download_options(
            product.properties["productType"], product.properties["id"])

        try:
            product_ids = [
                p["id"] for p in download_options["data"]
                if p["downloadSystem"] == "dds"
            ]
        except KeyError as e:
            raise NotAvailableError("%s not found in %s's products" %
                                    (e, product.properties["id"]))

        if not product_ids:
            raise NotAvailableError("No USGS products found for %s" %
                                    product.properties["id"])

        req_urls = []
        for product_id in product_ids:
            download_request = api.download_request(
                product.properties["productType"], product.properties["id"],
                product_id)
            try:
                req_urls.extend([
                    x["url"]
                    for x in download_request["data"]["preparingDownloads"]
                ])
            except KeyError as e:
                raise NotAvailableError("%s not found in %s download_request" %
                                        (e, product.properties["id"]))

        if len(req_urls) > 1:
            logger.warning(
                "%s usgs products found for %s. Only first will be downloaded"
                % (len(req_urls), product.properties["id"]))
        elif not req_urls:
            raise NotAvailableError("No usgs request url was found for %s" %
                                    product.properties["id"])

        req_url = req_urls[0]
        progress_callback.reset()
        with requests.get(
                req_url,
                stream=True,
        ) as stream:
            try:
                stream.raise_for_status()
            except HTTPError:
                import traceback as tb

                logger.error(
                    "Error while getting resource :\n%s",
                    tb.format_exc(),
                )
            else:
                stream_size = int(stream.headers.get("content-length", 0))
                progress_callback.max_size = stream_size
                progress_callback.reset()
                with open(fs_path, "wb") as fhandle:
                    for chunk in stream.iter_content(chunk_size=64 * 1024):
                        if chunk:
                            fhandle.write(chunk)
                            progress_callback(len(chunk), stream_size)

        with open(record_filename, "w") as fh:
            fh.write(product.properties["downloadLink"])
        logger.debug("Download recorded in %s", record_filename)

        api.logout()

        # Check that the downloaded file is really a tar file
        if not tarfile.is_tarfile(fs_path):
            logger.warning(
                "Downloaded product is not a tar File. Please check its file type before using it"
            )
            new_fs_path = fs_path[:fs_path.index(".tar.gz")]
            shutil.move(fs_path, new_fs_path)
            return new_fs_path
        return self._finalize(fs_path, outputs_extension=".tar.gz", **kwargs)
Exemple #6
0
    def download(self, product, auth=None, progress_callback=None, **kwargs):
        """Download method for AWS S3 API.

        :param product: The EO product to download
        :type product: :class:`~eodag.api.product.EOProduct`
        :param auth: (optional) The configuration of a plugin of type Authentication
        :type auth: :class:`~eodag.config.PluginConfig`
        :param progress_callback: (optional) A method or a callable object
                                  which takes a current size and a maximum
                                  size as inputs and handle progress bar
                                  creation and update to give the user a
                                  feedback on the download progress
        :type progress_callback: :class:`~eodag.utils.ProgressCallback` or None
        :return: The absolute path to the downloaded product in the local filesystem
        :rtype: str
        """
        product_conf = getattr(self.config, "products",
                               {}).get(product.product_type, {})

        build_safe = product_conf.get("build_safe", False)

        # product conf overrides provider conf for "flatten_top_dirs"
        flatten_top_dirs = product_conf.get(
            "flatten_top_dirs", getattr(self.config, "flatten_top_dirs",
                                        False))

        # xtra metadata needed for SAFE product
        if build_safe and "fetch_metadata" in product_conf.keys():
            fetch_format = product_conf["fetch_metadata"]["fetch_format"]
            update_metadata = product_conf["fetch_metadata"]["update_metadata"]
            fetch_url = product_conf["fetch_metadata"]["fetch_url"].format(
                **product.properties)
            if fetch_format == "json":
                logger.info("Fetching extra metadata from %s" % fetch_url)
                resp = requests.get(fetch_url)
                json_resp = resp.json()
                update_metadata = mtd_cfg_as_jsonpath(update_metadata)
                update_metadata = properties_from_json(json_resp,
                                                       update_metadata)
                product.properties.update(update_metadata)
            else:
                logger.warning(
                    "SAFE metadata fetch format %s not implemented" %
                    fetch_format)
        # if assets are defined, use them instead of scanning product.location
        if hasattr(product, "assets"):
            bucket_names_and_prefixes = []
            for complementary_url in getattr(product, "assets", {}).values():
                bucket_names_and_prefixes.append(
                    self.get_bucket_name_and_prefix(
                        product, complementary_url.get("href", "")))
        else:
            bucket_names_and_prefixes = [
                self.get_bucket_name_and_prefix(product)
            ]

        # add complementary urls
        for complementary_url_key in product_conf.get("complementary_url_key",
                                                      []):
            bucket_names_and_prefixes.append(
                self.get_bucket_name_and_prefix(
                    product, product.properties[complementary_url_key]))

        # prepare download & create dirs
        product_local_path, record_filename = self._prepare_download(
            product, **kwargs)
        if not product_local_path or not record_filename:
            return product_local_path
        product_local_path = product_local_path.replace(".zip", "")
        # remove existing incomplete file
        if os.path.isfile(product_local_path):
            os.remove(product_local_path)
        # create product dest dir
        if not os.path.isdir(product_local_path):
            os.makedirs(product_local_path)

        # progress bar init
        if progress_callback is None:
            progress_callback = get_progress_callback()
        progress_callback.desc = product.properties.get("id", "")
        progress_callback.position = 1

        # authenticate & get product size
        authenticated_objects = {}
        total_size = 0
        auth_error_messages = set()
        for idx, pack in enumerate(bucket_names_and_prefixes):
            try:
                bucket_name, prefix = pack
                if bucket_name not in authenticated_objects:
                    # get Prefixes longest common base path
                    common_prefix = ""
                    prefix_split = prefix.split("/")
                    prefixes_in_bucket = len([
                        p for b, p in bucket_names_and_prefixes
                        if b == bucket_name
                    ])
                    for i in range(1, len(prefix_split)):
                        common_prefix = "/".join(prefix_split[0:i])
                        if (len([
                                p for b, p in bucket_names_and_prefixes
                                if b == bucket_name and common_prefix in p
                        ]) < prefixes_in_bucket):
                            common_prefix = "/".join(prefix_split[0:i - 1])
                            break
                    # connect to aws s3 and get bucket auhenticated objects
                    s3_objects = self.get_authenticated_objects(
                        bucket_name, common_prefix, auth)
                    authenticated_objects[bucket_name] = s3_objects
                else:
                    s3_objects = authenticated_objects[bucket_name]

                total_size += sum(
                    [p.size for p in s3_objects.filter(Prefix=prefix)])

            except AuthenticationError as e:
                logger.warning("Unexpected error: %s" % e)
                logger.warning("Skipping %s/%s" % (bucket_name, prefix))
                auth_error_messages.add(str(e))
            except ClientError as e:
                err = e.response["Error"]
                auth_messages = [
                    "AccessDenied",
                    "InvalidAccessKeyId",
                    "SignatureDoesNotMatch",
                ]
                if err["Code"] in auth_messages and "key" in err[
                        "Message"].lower():
                    raise AuthenticationError(
                        "HTTP error {} returned\n{}: {}\nPlease check your credentials for {}"
                        .format(
                            e.response["ResponseMetadata"]["HTTPStatusCode"],
                            err["Code"],
                            err["Message"],
                            self.provider,
                        ))
                logger.warning("Unexpected error: %s" % e)
                logger.warning("Skipping %s/%s" % (bucket_name, prefix))
                auth_error_messages.add(str(e))

        # could not auth on any bucket
        if not authenticated_objects:
            raise AuthenticationError(", ".join(auth_error_messages))

        # bucket_names_and_prefixes with unauthenticated items filtered out
        auth_bucket_names_and_prefixes = [
            p for p in bucket_names_and_prefixes
            if p[0] in authenticated_objects.keys()
        ]

        # download
        progress_callback.max_size = total_size
        progress_callback.reset()
        for bucket_name, prefix in auth_bucket_names_and_prefixes:
            try:
                s3_objects = authenticated_objects[bucket_name]

                for product_chunk in s3_objects.filter(Prefix=prefix, ):
                    chunck_rel_path = self.get_chunck_dest_path(
                        product,
                        product_chunk,
                        build_safe=build_safe,
                        dir_prefix=prefix,
                    )
                    chunck_abs_path = os.path.join(product_local_path,
                                                   chunck_rel_path)
                    chunck_abs_path_dir = os.path.dirname(chunck_abs_path)
                    if not os.path.isdir(chunck_abs_path_dir):
                        os.makedirs(chunck_abs_path_dir)

                    if not os.path.isfile(chunck_abs_path):
                        product_chunk.Bucket().download_file(
                            product_chunk.key,
                            chunck_abs_path,
                            ExtraArgs=getattr(s3_objects, "_params", {}),
                            Callback=progress_callback,
                        )

            except AuthenticationError as e:
                logger.warning("Unexpected error: %s" % e)
                logger.warning("Skipping %s/%s" % (bucket_name, prefix))
            except ClientError as e:
                err = e.response["Error"]
                auth_messages = [
                    "AccessDenied",
                    "InvalidAccessKeyId",
                    "SignatureDoesNotMatch",
                ]
                if err["Code"] in auth_messages and "key" in err[
                        "Message"].lower():
                    raise AuthenticationError(
                        "HTTP error {} returned\n{}: {}\nPlease check your credentials for {}"
                        .format(
                            e.response["ResponseMetadata"]["HTTPStatusCode"],
                            err["Code"],
                            err["Message"],
                            self.provider,
                        ))
                logger.warning("Unexpected error: %s" % e)
                logger.warning("Skipping %s/%s" % (bucket_name, prefix))

        # finalize safe product
        if build_safe and "S2_MSI" in product.product_type:
            self.finalize_s2_safe_product(product_local_path)
        # flatten directory structure
        elif flatten_top_dirs:
            tmp_product_local_path = "%s-tmp" % product_local_path
            for d, dirs, files in os.walk(product_local_path):
                if len(files) != 0:
                    shutil.copytree(d, tmp_product_local_path)
                    shutil.rmtree(product_local_path)
                    os.rename(tmp_product_local_path, product_local_path)
                    break

        # save hash/record file
        with open(record_filename, "w") as fh:
            fh.write(product.remote_location)
        logger.debug("Download recorded in %s", record_filename)

        return product_local_path
Exemple #7
0
    def download(self, product, auth=None, progress_callback=None, **kwargs):
        """Download method for S3 REST API.

        :param product: The EO product to download
        :type product: :class:`~eodag.api.product.EOProduct`
        :param auth: (optional) The configuration of a plugin of type Authentication
        :type auth: :class:`~eodag.config.PluginConfig`
        :param progress_callback: (optional) A method or a callable object
                                  which takes a current size and a maximum
                                  size as inputs and handle progress bar
                                  creation and update to give the user a
                                  feedback on the download progress
        :type progress_callback: :class:`~eodag.utils.ProgressCallback` or None
        :return: The absolute path to the downloaded product in the local filesystem
        :rtype: str
        """
        # get bucket urls
        bucket_name, prefix = self.get_bucket_name_and_prefix(product)

        if (
            bucket_name is None
            and "storageStatus" in product.properties
            and product.properties["storageStatus"] == OFFLINE_STATUS
        ):
            raise NotAvailableError(
                "%s is not available for download on %s (status = %s)"
                % (
                    product.properties["title"],
                    self.provider,
                    product.properties["storageStatus"],
                )
            )

        bucket_url = urljoin(
            product.downloader.config.base_uri.strip("/") + "/", bucket_name
        )
        nodes_list_url = bucket_url + "?prefix=" + prefix.strip("/")

        # get nodes/files list contained in the bucket
        logger.debug("Retrieving product content from %s", nodes_list_url)
        bucket_contents = requests.get(nodes_list_url, auth=auth)
        try:
            bucket_contents.raise_for_status()
        except requests.HTTPError as err:
            # check if error is identified as auth_error in provider conf
            auth_errors = getattr(self.config, "auth_error_code", [None])
            if not isinstance(auth_errors, list):
                auth_errors = [auth_errors]
            if err.response.status_code in auth_errors:
                raise AuthenticationError(
                    "HTTP Error %s returned, %s\nPlease check your credentials for %s"
                    % (
                        err.response.status_code,
                        err.response.text.strip(),
                        self.provider,
                    )
                )
            # other error
            else:
                logger.exception(
                    "Could not get content from %s (provider:%s, plugin:%s)\n%s",
                    nodes_list_url,
                    self.provider,
                    self.__class__.__name__,
                    bucket_contents.text,
                )
                raise RequestError(str(err))
        try:
            xmldoc = minidom.parseString(bucket_contents.text)
        except ExpatError as err:
            logger.exception("Could not parse xml data from %s", bucket_contents)
            raise DownloadError(str(err))
        nodes_xml_list = xmldoc.getElementsByTagName("Contents")

        if len(nodes_xml_list) == 0:
            logger.warning("Could not load any content from %s", nodes_list_url)
        elif len(nodes_xml_list) == 1:
            # single file download
            product.remote_location = urljoin(
                bucket_url.strip("/") + "/", prefix.strip("/")
            )
            return HTTPDownload(self.provider, self.config).download(
                product=product,
                auth=auth,
                progress_callback=progress_callback,
                **kwargs
            )

        # destination product path
        outputs_prefix = kwargs.pop("ouputs_prefix", None) or self.config.outputs_prefix
        abs_outputs_prefix = os.path.abspath(outputs_prefix)
        product_local_path = os.path.join(abs_outputs_prefix, prefix.split("/")[-1])

        # .downloaded cache record directory
        download_records_dir = os.path.join(abs_outputs_prefix, ".downloaded")
        try:
            os.makedirs(download_records_dir)
        except OSError as exc:
            import errno

            if exc.errno != errno.EEXIST:  # Skip error if dir exists
                import traceback as tb

                logger.warning(
                    "Unable to create records directory. Got:\n%s", tb.format_exc()
                )
        # check if product has already been downloaded
        url_hash = hashlib.md5(product.remote_location.encode("utf-8")).hexdigest()
        record_filename = os.path.join(download_records_dir, url_hash)
        if os.path.isfile(record_filename) and os.path.exists(product_local_path):
            return product_local_path
        # Remove the record file if product_local_path is absent (e.g. it was deleted while record wasn't)
        elif os.path.isfile(record_filename):
            logger.debug(
                "Record file found (%s) but not the actual file", record_filename
            )
            logger.debug("Removing record file : %s", record_filename)
            os.remove(record_filename)

        # total size for progress_callback
        total_size = sum(
            [
                int(node.firstChild.nodeValue)
                for node in xmldoc.getElementsByTagName("Size")
            ]
        )
        # progress bar init
        if progress_callback is None:
            progress_callback = get_progress_callback()
        progress_callback.desc = product.properties.get("id", "")
        progress_callback.position = 1
        progress_callback.max_size = total_size
        progress_callback.reset()

        # download each node key
        for node_xml in nodes_xml_list:
            node_key = node_xml.getElementsByTagName("Key")[0].firstChild.nodeValue
            # As "Key", "Size" and "ETag" (md5 hash) can also be retrieved from node_xml
            node_url = urljoin(bucket_url.strip("/") + "/", node_key.strip("/"))
            # output file location
            local_filename = os.path.join(
                self.config.outputs_prefix, "/".join(node_key.split("/")[6:])
            )
            local_filename_dir = os.path.dirname(os.path.realpath(local_filename))
            if not os.path.isdir(local_filename_dir):
                os.makedirs(local_filename_dir)

            with requests.get(node_url, stream=True, auth=auth) as stream:
                try:
                    stream.raise_for_status()
                except HTTPError:
                    import traceback as tb

                    logger.error("Error while getting resource :\n%s", tb.format_exc())
                else:
                    with open(local_filename, "wb") as fhandle:
                        for chunk in stream.iter_content(chunk_size=64 * 1024):
                            if chunk:
                                fhandle.write(chunk)
                                progress_callback(len(chunk), total_size)

            # TODO: check md5 hash ?

        with open(record_filename, "w") as fh:
            fh.write(product.remote_location)
        logger.debug("Download recorded in %s", record_filename)

        return product_local_path