def __init__(self, provider, config): super(Search, self).__init__(provider, config) # Prepare the metadata mapping # Do a shallow copy, the structure is flat enough for this to be sufficient metas = DEFAULT_METADATA_MAPPING.copy() # Update the defaults with the mapping value. This will add any new key # added by the provider mapping that is not in the default metadata metas.update(self.config.metadata_mapping) self.config.metadata_mapping = mtd_cfg_as_jsonpath( metas, self.config.metadata_mapping)
def query(self, product_type=None, **kwargs): """Search for data on USGS catalogues .. versionchanged:: 1.0 * ``product_type`` is no longer mandatory """ product_type = kwargs.get("productType") if product_type is None: return [], 0 api.login( self.config.credentials["username"], self.config.credentials["password"], save=True, ) usgs_dataset = self.config.products[product_type]["dataset"] usgs_catalog_node = self.config.products[product_type]["catalog_node"] start_date = kwargs.pop("startTimeFromAscendingNode", None) end_date = kwargs.pop("completionTimeFromAscendingNode", None) footprint = kwargs.pop("geometry", None) # Configuration to generate the download url of search results result_summary_pattern = re.compile( r"^ID: .+, Acquisition Date: .+, Path: (?P<path>\d+), Row: (?P<row>\d+)$" # noqa ) # See https://pyformat.info/, on section "Padding and aligning strings" to # understand {path:0>3} and {row:0>3}. # It roughly means: 'if the string that will be passed as "path" has length < 3, # prepend as much "0"s as needed to reach length 3' and same for "row" dl_url_pattern = "{base_url}/L8/{path:0>3}/{row:0>3}/{entity}.tar.bz" final = [] if footprint and len(footprint.keys()) == 4: # a rectangle (or bbox) lower_left = { "longitude": footprint["lonmin"], "latitude": footprint["latmin"], } upper_right = { "longitude": footprint["lonmax"], "latitude": footprint["latmax"], } else: lower_left, upper_right = None, None try: results = api.search( usgs_dataset, usgs_catalog_node, start_date=start_date, end_date=end_date, ll=lower_left, ur=upper_right, ) for result in results["data"]["results"]: r_lower_left = result["spatialFootprint"]["coordinates"][0][0] r_upper_right = result["spatialFootprint"]["coordinates"][0][2] summary_match = result_summary_pattern.match( result["summary"]).groupdict() result["geometry"] = geometry.box(r_lower_left[0], r_lower_left[1], r_upper_right[0], r_upper_right[1]) # Same method as in base.py, Search.__init__() # Prepare the metadata mapping # Do a shallow copy, the structure is flat enough for this to be sufficient metas = DEFAULT_METADATA_MAPPING.copy() # Update the defaults with the mapping value. This will add any new key # added by the provider mapping that is not in the default metadata. # A deepcopy is done to prevent self.config.metadata_mapping from being modified when metas[metadata] # is a list and is modified metas.update(copy.deepcopy(self.config.metadata_mapping)) metas = mtd_cfg_as_jsonpath(metas) result["productType"] = usgs_dataset product_properties = properties_from_json(result, metas) if getattr(self.config, "product_location_scheme", "https") == "file": product_properties["downloadLink"] = dl_url_pattern.format( base_url="file://") else: product_properties["downloadLink"] = dl_url_pattern.format( base_url=self.config.google_base_url.rstrip("/"), entity=result["entityId"], **summary_match) final.append( EOProduct( productType=product_type, provider=self.provider, properties=product_properties, geometry=footprint, )) except USGSError as e: logger.debug( "Product type %s does not exist on catalogue %s", usgs_dataset, usgs_catalog_node, ) logger.debug("Skipping error: %s", e) api.logout() return final, len(final)
def download(self, product, auth=None, progress_callback=None, **kwargs): """Download method for AWS S3 API. :param product: The EO product to download :type product: :class:`~eodag.api.product.EOProduct` :param auth: (optional) The configuration of a plugin of type Authentication :type auth: :class:`~eodag.config.PluginConfig` :param progress_callback: (optional) A method or a callable object which takes a current size and a maximum size as inputs and handle progress bar creation and update to give the user a feedback on the download progress :type progress_callback: :class:`~eodag.utils.ProgressCallback` or None :return: The absolute path to the downloaded product in the local filesystem :rtype: str or unicode """ product_conf = self.config.products.get(product.product_type, {}) build_safe = product_conf.get("build_safe", False) # xtra metadata needed for SAFE product if build_safe and "fetch_metadata" in product_conf.keys(): fetch_format = product_conf["fetch_metadata"]["fetch_format"] update_metadata = product_conf["fetch_metadata"]["update_metadata"] fetch_url = product_conf["fetch_metadata"]["fetch_url"].format( **product.properties) if fetch_format == "json": logger.info("Fetching extra metadata from %s" % fetch_url) resp = requests.get(fetch_url) json_resp = resp.json() update_metadata = mtd_cfg_as_jsonpath(update_metadata) update_metadata = properties_from_json(json_resp, update_metadata) product.properties.update(update_metadata) else: logger.warning( "SAFE metadata fetch format %s not implemented" % fetch_format) bucket_names_and_prefixes = [self.get_bucket_name_and_prefix(product)] # add complementary urls for complementary_url_key in product_conf.get("complementary_url_key", []): bucket_names_and_prefixes.append( self.get_bucket_name_and_prefix( product, product.properties[complementary_url_key])) # prepare download & create dirs product_local_path, record_filename = self._prepare_download(product) if not product_local_path or not record_filename: return product_local_path product_local_path = product_local_path.replace(".zip", "") # remove existing incomplete file if os.path.isfile(product_local_path): os.remove(product_local_path) # create product dest dir if not os.path.isdir(product_local_path): os.makedirs(product_local_path) with tqdm( total=len(bucket_names_and_prefixes), unit="parts", desc="Downloading product parts", ) as bar: for bucket_name, prefix in bucket_names_and_prefixes: # connect to aws s3 access_key, access_secret = auth s3 = boto3.resource( "s3", aws_access_key_id=access_key, aws_secret_access_key=access_secret, ) bucket = s3.Bucket(bucket_name) total_size = sum([ p.size for p in bucket.objects.filter(Prefix=prefix, RequestPayer="requester") ]) progress_callback.max_size = total_size for product_chunk in bucket.objects.filter( Prefix=prefix, RequestPayer="requester"): chunck_rel_path = self.get_chunck_dest_path( product, product_chunk, build_safe=build_safe, dir_prefix=prefix) chunck_abs_path = os.path.join(product_local_path, chunck_rel_path) chunck_abs_path_dir = os.path.dirname(chunck_abs_path) if not os.path.isdir(chunck_abs_path_dir): os.makedirs(chunck_abs_path_dir) if not os.path.isfile(chunck_abs_path): bucket.download_file( product_chunk.key, chunck_abs_path, ExtraArgs={"RequestPayer": "requester"}, Callback=progress_callback, ) bar.update(1) # finalize safe product if build_safe and "S2_MSI" in product.product_type: self.finalize_s2_safe_product(product_local_path) # save hash/record file with open(record_filename, "w") as fh: fh.write(product.remote_location) logger.debug("Download recorded in %s", record_filename) return product_local_path
def query(self, product_type=None, items_per_page=None, page=None, count=True, **kwargs): """Search for data on USGS catalogues .. versionchanged:: 2.2.0 * Based on usgs library v0.3.0 which now uses M2M API. The library is used for both search & download .. versionchanged:: 1.0 * ``product_type`` is no longer mandatory """ product_type = kwargs.get("productType") if product_type is None: return [], 0 try: api.login( self.config.credentials["username"], self.config.credentials["password"], save=True, ) except USGSError: raise AuthenticationError( "Please check your USGS credentials.") from None product_type_def_params = self.config.products.get( product_type, self.config.products[GENERIC_PRODUCT_TYPE]) usgs_dataset = format_dict_items(product_type_def_params, **kwargs)["dataset"] start_date = kwargs.pop("startTimeFromAscendingNode", None) end_date = kwargs.pop("completionTimeFromAscendingNode", None) geom = kwargs.pop("geometry", None) footprint = {} if hasattr(geom, "bounds"): ( footprint["lonmin"], footprint["latmin"], footprint["lonmax"], footprint["latmax"], ) = geom.bounds else: footprint = geom final = [] if footprint and len(footprint.keys()) == 4: # a rectangle (or bbox) lower_left = { "longitude": footprint["lonmin"], "latitude": footprint["latmin"], } upper_right = { "longitude": footprint["lonmax"], "latitude": footprint["latmax"], } else: lower_left, upper_right = None, None try: results = api.scene_search( usgs_dataset, start_date=start_date, end_date=end_date, ll=lower_left, ur=upper_right, max_results=items_per_page, starting_number=(1 + (page - 1) * items_per_page), ) # Same method as in base.py, Search.__init__() # Prepare the metadata mapping # Do a shallow copy, the structure is flat enough for this to be sufficient metas = DEFAULT_METADATA_MAPPING.copy() # Update the defaults with the mapping value. This will add any new key # added by the provider mapping that is not in the default metadata. # A deepcopy is done to prevent self.config.metadata_mapping from being modified when metas[metadata] # is a list and is modified metas.update(copy.deepcopy(self.config.metadata_mapping)) metas = mtd_cfg_as_jsonpath(metas) for result in results["data"]["results"]: result["productType"] = usgs_dataset product_properties = properties_from_json(result, metas) final.append( EOProduct( productType=product_type, provider=self.provider, properties=product_properties, geometry=footprint, )) except USGSError as e: logger.warning( "Product type %s does not exist on USGS EE catalog", usgs_dataset, ) logger.warning("Skipping error: %s", e) api.logout() if final: # parse total_results path_parsed = parse( self.config.pagination["total_items_nb_key_path"]) total_results = path_parsed.find(results["data"])[0].value else: total_results = 0 return final, total_results
def download(self, product, auth=None, progress_callback=None, **kwargs): """Download method for AWS S3 API. :param product: The EO product to download :type product: :class:`~eodag.api.product.EOProduct` :param auth: (optional) The configuration of a plugin of type Authentication :type auth: :class:`~eodag.config.PluginConfig` :param progress_callback: (optional) A method or a callable object which takes a current size and a maximum size as inputs and handle progress bar creation and update to give the user a feedback on the download progress :type progress_callback: :class:`~eodag.utils.ProgressCallback` or None :return: The absolute path to the downloaded product in the local filesystem :rtype: str """ product_conf = getattr(self.config, "products", {}).get(product.product_type, {}) build_safe = product_conf.get("build_safe", False) # product conf overrides provider conf for "flatten_top_dirs" flatten_top_dirs = product_conf.get( "flatten_top_dirs", getattr(self.config, "flatten_top_dirs", False)) # xtra metadata needed for SAFE product if build_safe and "fetch_metadata" in product_conf.keys(): fetch_format = product_conf["fetch_metadata"]["fetch_format"] update_metadata = product_conf["fetch_metadata"]["update_metadata"] fetch_url = product_conf["fetch_metadata"]["fetch_url"].format( **product.properties) if fetch_format == "json": logger.info("Fetching extra metadata from %s" % fetch_url) resp = requests.get(fetch_url) json_resp = resp.json() update_metadata = mtd_cfg_as_jsonpath(update_metadata) update_metadata = properties_from_json(json_resp, update_metadata) product.properties.update(update_metadata) else: logger.warning( "SAFE metadata fetch format %s not implemented" % fetch_format) # if assets are defined, use them instead of scanning product.location if hasattr(product, "assets"): bucket_names_and_prefixes = [] for complementary_url in getattr(product, "assets", {}).values(): bucket_names_and_prefixes.append( self.get_bucket_name_and_prefix( product, complementary_url.get("href", ""))) else: bucket_names_and_prefixes = [ self.get_bucket_name_and_prefix(product) ] # add complementary urls for complementary_url_key in product_conf.get("complementary_url_key", []): bucket_names_and_prefixes.append( self.get_bucket_name_and_prefix( product, product.properties[complementary_url_key])) # prepare download & create dirs product_local_path, record_filename = self._prepare_download( product, **kwargs) if not product_local_path or not record_filename: return product_local_path product_local_path = product_local_path.replace(".zip", "") # remove existing incomplete file if os.path.isfile(product_local_path): os.remove(product_local_path) # create product dest dir if not os.path.isdir(product_local_path): os.makedirs(product_local_path) # progress bar init if progress_callback is None: progress_callback = get_progress_callback() progress_callback.desc = product.properties.get("id", "") progress_callback.position = 1 # authenticate & get product size authenticated_objects = {} total_size = 0 auth_error_messages = set() for idx, pack in enumerate(bucket_names_and_prefixes): try: bucket_name, prefix = pack if bucket_name not in authenticated_objects: # get Prefixes longest common base path common_prefix = "" prefix_split = prefix.split("/") prefixes_in_bucket = len([ p for b, p in bucket_names_and_prefixes if b == bucket_name ]) for i in range(1, len(prefix_split)): common_prefix = "/".join(prefix_split[0:i]) if (len([ p for b, p in bucket_names_and_prefixes if b == bucket_name and common_prefix in p ]) < prefixes_in_bucket): common_prefix = "/".join(prefix_split[0:i - 1]) break # connect to aws s3 and get bucket auhenticated objects s3_objects = self.get_authenticated_objects( bucket_name, common_prefix, auth) authenticated_objects[bucket_name] = s3_objects else: s3_objects = authenticated_objects[bucket_name] total_size += sum( [p.size for p in s3_objects.filter(Prefix=prefix)]) except AuthenticationError as e: logger.warning("Unexpected error: %s" % e) logger.warning("Skipping %s/%s" % (bucket_name, prefix)) auth_error_messages.add(str(e)) except ClientError as e: err = e.response["Error"] auth_messages = [ "AccessDenied", "InvalidAccessKeyId", "SignatureDoesNotMatch", ] if err["Code"] in auth_messages and "key" in err[ "Message"].lower(): raise AuthenticationError( "HTTP error {} returned\n{}: {}\nPlease check your credentials for {}" .format( e.response["ResponseMetadata"]["HTTPStatusCode"], err["Code"], err["Message"], self.provider, )) logger.warning("Unexpected error: %s" % e) logger.warning("Skipping %s/%s" % (bucket_name, prefix)) auth_error_messages.add(str(e)) # could not auth on any bucket if not authenticated_objects: raise AuthenticationError(", ".join(auth_error_messages)) # bucket_names_and_prefixes with unauthenticated items filtered out auth_bucket_names_and_prefixes = [ p for p in bucket_names_and_prefixes if p[0] in authenticated_objects.keys() ] # download progress_callback.max_size = total_size progress_callback.reset() for bucket_name, prefix in auth_bucket_names_and_prefixes: try: s3_objects = authenticated_objects[bucket_name] for product_chunk in s3_objects.filter(Prefix=prefix, ): chunck_rel_path = self.get_chunck_dest_path( product, product_chunk, build_safe=build_safe, dir_prefix=prefix, ) chunck_abs_path = os.path.join(product_local_path, chunck_rel_path) chunck_abs_path_dir = os.path.dirname(chunck_abs_path) if not os.path.isdir(chunck_abs_path_dir): os.makedirs(chunck_abs_path_dir) if not os.path.isfile(chunck_abs_path): product_chunk.Bucket().download_file( product_chunk.key, chunck_abs_path, ExtraArgs=getattr(s3_objects, "_params", {}), Callback=progress_callback, ) except AuthenticationError as e: logger.warning("Unexpected error: %s" % e) logger.warning("Skipping %s/%s" % (bucket_name, prefix)) except ClientError as e: err = e.response["Error"] auth_messages = [ "AccessDenied", "InvalidAccessKeyId", "SignatureDoesNotMatch", ] if err["Code"] in auth_messages and "key" in err[ "Message"].lower(): raise AuthenticationError( "HTTP error {} returned\n{}: {}\nPlease check your credentials for {}" .format( e.response["ResponseMetadata"]["HTTPStatusCode"], err["Code"], err["Message"], self.provider, )) logger.warning("Unexpected error: %s" % e) logger.warning("Skipping %s/%s" % (bucket_name, prefix)) # finalize safe product if build_safe and "S2_MSI" in product.product_type: self.finalize_s2_safe_product(product_local_path) # flatten directory structure elif flatten_top_dirs: tmp_product_local_path = "%s-tmp" % product_local_path for d, dirs, files in os.walk(product_local_path): if len(files) != 0: shutil.copytree(d, tmp_product_local_path) shutil.rmtree(product_local_path) os.rename(tmp_product_local_path, product_local_path) break # save hash/record file with open(record_filename, "w") as fh: fh.write(product.remote_location) logger.debug("Download recorded in %s", record_filename) return product_local_path
def download(self, product, auth=None, progress_callback=None, **kwargs): """Download method for AWS S3 API. :param product: The EO product to download :type product: :class:`~eodag.api.product.EOProduct` :param auth: (optional) The configuration of a plugin of type Authentication :type auth: :class:`~eodag.config.PluginConfig` :param progress_callback: (optional) A method or a callable object which takes a current size and a maximum size as inputs and handle progress bar creation and update to give the user a feedback on the download progress :type progress_callback: :class:`~eodag.utils.ProgressCallback` or None :return: The absolute path to the downloaded product in the local filesystem :rtype: str """ product_conf = getattr(self.config, "products", {}).get(product.product_type, {}) build_safe = product_conf.get("build_safe", False) # product conf overrides provider conf for "flatten_top_dirs" flatten_top_dirs = product_conf.get( "flatten_top_dirs", getattr(self.config, "flatten_top_dirs", False)) # xtra metadata needed for SAFE product if build_safe and "fetch_metadata" in product_conf.keys(): fetch_format = product_conf["fetch_metadata"]["fetch_format"] update_metadata = product_conf["fetch_metadata"]["update_metadata"] fetch_url = product_conf["fetch_metadata"]["fetch_url"].format( **product.properties) if fetch_format == "json": logger.info("Fetching extra metadata from %s" % fetch_url) resp = requests.get(fetch_url) json_resp = resp.json() update_metadata = mtd_cfg_as_jsonpath(update_metadata) update_metadata = properties_from_json(json_resp, update_metadata) product.properties.update(update_metadata) else: logger.warning( "SAFE metadata fetch format %s not implemented" % fetch_format) # if assets are defined, use them instead of scanning product.location if hasattr(product, "assets"): bucket_names_and_prefixes = [] for complementary_url in getattr(product, "assets", {}).values(): bucket_names_and_prefixes.append( self.get_bucket_name_and_prefix( product, complementary_url.get("href", ""))) else: bucket_names_and_prefixes = [ self.get_bucket_name_and_prefix(product) ] # add complementary urls for complementary_url_key in product_conf.get("complementary_url_key", []): bucket_names_and_prefixes.append( self.get_bucket_name_and_prefix( product, product.properties[complementary_url_key])) # prepare download & create dirs product_local_path, record_filename = self._prepare_download( product, **kwargs) if not product_local_path or not record_filename: return product_local_path product_local_path = product_local_path.replace(".zip", "") # remove existing incomplete file if os.path.isfile(product_local_path): os.remove(product_local_path) # create product dest dir if not os.path.isdir(product_local_path): os.makedirs(product_local_path) with tqdm( total=len(bucket_names_and_prefixes), unit="parts", desc="Downloading product parts", ) as bar: for bucket_name, prefix in bucket_names_and_prefixes: try: # connect to aws s3 access_key, access_secret = auth s3 = boto3.resource( "s3", aws_access_key_id=access_key, aws_secret_access_key=access_secret, ) bucket = s3.Bucket(bucket_name) total_size = sum([ p.size for p in bucket.objects.filter( Prefix=prefix, RequestPayer="requester") ]) progress_callback.max_size = total_size for product_chunk in bucket.objects.filter( Prefix=prefix, RequestPayer="requester"): chunck_rel_path = self.get_chunck_dest_path( product, product_chunk, build_safe=build_safe, dir_prefix=prefix, ) chunck_abs_path = os.path.join(product_local_path, chunck_rel_path) chunck_abs_path_dir = os.path.dirname(chunck_abs_path) if not os.path.isdir(chunck_abs_path_dir): os.makedirs(chunck_abs_path_dir) if not os.path.isfile(chunck_abs_path): bucket.download_file( product_chunk.key, chunck_abs_path, ExtraArgs={"RequestPayer": "requester"}, Callback=progress_callback, ) except ClientError as e: err = e.response["Error"] auth_messages = [ "InvalidAccessKeyId", "SignatureDoesNotMatch" ] if err["Code"] in auth_messages and "key" in err[ "Message"].lower(): raise AuthenticationError( "HTTP error {} returned\n{}: {}\nPlease check your credentials for {}" .format( e.response["ResponseMetadata"] ["HTTPStatusCode"], err["Code"], err["Message"], self.provider, )) logger.warning("Unexpected error: %s" % e) logger.warning("Skipping %s/%s" % (bucket_name, prefix)) bar.update(1) # finalize safe product if build_safe and "S2_MSI" in product.product_type: self.finalize_s2_safe_product(product_local_path) # flatten directory structure elif flatten_top_dirs: tmp_product_local_path = "%s-tmp" % product_local_path for d, dirs, files in os.walk(product_local_path): if len(files) != 0: shutil.copytree(d, tmp_product_local_path) shutil.rmtree(product_local_path) os.rename(tmp_product_local_path, product_local_path) break # save hash/record file with open(record_filename, "w") as fh: fh.write(product.remote_location) logger.debug("Download recorded in %s", record_filename) return product_local_path