def __init__(self, path, row, times): """ 初始化 """ if int(path) > 151 or int(path) < 113: raise ValueError("Wrong path.") if int(row) < 23 or int(row) > 49: raise ValueError("Wrong row.") self.path = "{:03d}".format(path) self.row = "{:03d}".format(row) self.pr = self.path + self.row self.times = times # login try: api.login('menglimeng', '302112aA', save=True, catalogId='EE') except Exception: print('Cannot login to usgs earthexplorer right now.') self.api_key = api._get_api_key(None) self.aws_l8_url = 'https://landsat-pds.s3.amazonaws.com/c1/L8/' self._dataset = 'LANDSAT_8_C1' # self._dataset = 'LANDSAT_ETM_C1' self._node = 'EE' # centroid coordinate of each tile self._centroidLonlat = read_json(os.path.join( os.path.dirname(__file__), 'aux_data', 'wrs2_centroid_china.json'))
def __init__(self, tiles, times): """ 初始化 Args: tiles (list): 需要下载的哨兵2数据MGRS编号列表. times (list): 需要下载数据日期列表(iso format ). """ self.tiles = tiles self.times = times # login try: api.login('menglimeng', '302112aA', save=True, catalogId='EE') except Exception: print('Cannot login to usgs earthexplorer right now.') self.api_key = api._get_api_key(None) self._dataset = 'SENTINEL_2A' self._node = 'EE' # centroid coordinate of each tile self._centroidLonlat = read_json( os.path.join(os.path.dirname(__file__), 'aux_data', 'mgrs_centroid.json'))
def cycle_token(username, password): credential_filepath = os.path.join(os.path.expanduser("~"), ".usgs") with open(credential_filepath) as f: credentials = json.load(f) created = datetime.strptime(credentials['created'], "%Y-%m-%dT%H:%M:%S.%f") token_lifetime = (datetime.now() - created).seconds approx_two_hours = 2 * 60 * 60 - 60 click.echo('The token lifetime is {} seconds'.format(token_lifetime)) if token_lifetime > approx_two_hours: api.logout() api.login(username, password)
def query_for_scenes(start_date, end_date, verbose=False, limit=None): if 'USGS_PASSWORD' in os.environ: if verbose: print 'logging in...' api_key = api.login(os.environ['USGS_USERID'], os.environ['USGS_PASSWORD']) if verbose: print ' api_key = %s' % api_key full_list = [] list_offset = 0 these_scenes = 'start' chunk_size = 500 if limit is not None and limit < chunk_size: chunk_size = limit if verbose: print 'search...' while these_scenes == 'start' or len(these_scenes) == chunk_size: these_scenes = api.search("LANDSAT_8", "EE", start_date=start_date, end_date=end_date, starting_number = 1+list_offset, max_results=chunk_size) if verbose: print '... %d scenes' % len(these_scenes) full_list += these_scenes list_offset += len(these_scenes) if limit is not None and list_offset >= limit: break scene_ids = [scene['entityId'] for scene in full_list] return scene_ids
def retry_login(retries=4, verbose=False): """ Retry USGS login multiple times, with exponential backoff between """ if verbose: print 'logging in...' sleep_time = 5 for _ in xrange(retries + 1): try: api_key = api.login(os.environ['USGS_USERID'], os.environ['USGS_PASSWORD']) if verbose: print ' api_key = %s' % api_key return api_key except USGSError: pass print 'USGS login failed, retry in %s' % sleep_time time.sleep(sleep_time) sleep_time *= backoff_factor(2) return None
def poll_usgs(): """ Check whether USGS has made any new scenes available. In the case of RT scenes, we check only a few days back. In the case of T1/T2 scenes we check 4 weeks back due to processing latencies. """ api_key = api.login(os.environ['USGS_USERNAME'], os.environ['USGS_PASSWORD'], save=False)['data'] tier = os.environ['TIER'] now = datetime.now() fmt = '%Y-%m-%d' days_prior = 4 if tier == 'RT' else 30 start_date = (now - timedelta(days=days_prior)).strftime(fmt) end_date = now.strftime(fmt) # This field id represents the Collection Category where = { 20510: tier } result = api.search( 'LANDSAT_8_C1', 'EE', start_date=start_date, end_date=end_date, where=where, api_key=api_key) # Strangely, the entity id is still used to obtain a download url. entityIds = [ scene['entityId'] for scene in result['data']['results'] ] return entityIds
def usgs(self, scenes): """ Downloads the image from USGS """ if not isinstance(scenes, list): raise Exception('Expected sceneIDs list') scene_objs = Scenes() # download from usgs if login information is provided if self.usgs_user and self.usgs_pass: try: api_key = api.login(self.usgs_user, self.usgs_pass) except USGSError as e: error_tree = ElementTree.fromstring(str(e.message)) error_text = error_tree.find("SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text raise USGSInventoryAccessMissing(error_text) for scene in scenes: download_urls = api.download('LANDSAT_8', 'EE', [scene], api_key=api_key) if download_urls: logger.info('Source: USGS EarthExplorer') scene_objs.add_with_files(scene, fetch(download_urls[0], self.download_dir)) else: raise RemoteFileDoesntExist('{0} not available on AWS S3, Google or USGS Earth Explorer'.format( ' - '.join(scene))) return scene_objs raise RemoteFileDoesntExist('{0} not available on AWS S3 or Google Storage'.format(' - '.join(scenes)))
def query_for_scenes(start_date, end_date, verbose=False, limit=None): if 'USGS_PASSWORD' in os.environ: if verbose: print 'logging in...' api_key = api.login(os.environ['USGS_USERID'], os.environ['USGS_PASSWORD']) if verbose: print ' api_key = %s' % api_key full_list = [] list_offset = 0 these_scenes = 'start' chunk_size = 500 if limit is not None and limit < chunk_size: chunk_size = limit if verbose: print 'search...' while these_scenes == 'start' or len(these_scenes) == chunk_size: these_scenes = api.search("LANDSAT_8", "EE", start_date=start_date, end_date=end_date, starting_number=1 + list_offset, max_results=chunk_size) if verbose: print '... %d scenes' % len(these_scenes) full_list += these_scenes list_offset += len(these_scenes) if limit is not None and list_offset >= limit: break scene_ids = [scene['entityId'] for scene in full_list] return scene_ids
def create_snapshots(): """ Run requests against USGS API for use in tests. """ api_key = api.login(os.environ['USGS_USERNAME'], os.environ['USGS_PASSWORD']) # Dataset Fields response = api.dataset_fields("LANDSAT_8_C1", "EE", api_key=api_key) write_response(response, 'dataset-fields.json') # Datasets response = api.datasets(None, "EE") write_response(response, 'datasets.json') # Download response = api.download("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"], product='STANDARD') write_response(response, 'download.json') # Download Options response = api.download_options("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"]) write_response(response, 'download-options.json') # Metadata response = api.metadata("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"]) write_response(response, 'metadata.json') # Search response = api.search("LANDSAT_8_C1", "EE", start_date='20170401', end_date='20170402', max_results=10) write_response(response, 'search.json') api.logout(api_key)
def login(username, password): """Login to USGS Earth Explorer api Key expires after 1 hr. This is a basic rewrite of usgs login cli with username and password prompt. """ api_key = api.login(username, password) print(api_key)
def test_login(): username = os.environ.get("USGS_USERNAME") password = os.environ.get("USGS_PASSWORD") api_key = api.login(username, password) assert isinstance(api_key, str)
def usgs_eros(self, scene, path): """ Downloads the image from USGS """ # download from usgs if login information is provided if self.usgs_user and self.usgs_pass: try: api_key = api.login(self.usgs_user, self.usgs_pass) except USGSError as e: error_tree = ElementTree.fromstring(str(e.message)) error_text = error_tree.find( "SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text raise USGSInventoryAccessMissing(error_text) download_url = api.download('LANDSAT_8', 'EE', [scene], api_key=api_key) if download_url: self.output('Source: USGS EarthExplorer', normal=True, arrow=True) return self.fetch(download_url[0], path) raise RemoteFileDoesntExist( '%s is not available on AWS S3, Google or USGS Earth Explorer' % scene) raise RemoteFileDoesntExist( '%s is not available on AWS S3 or Google Storage' % scene)
def fetch_dswe_images(date, ll_coord, ur_coord, output_folder, user, password, force_login): """Download all DSWE images that fit the given criteria to the output folder if they are not already present. The coordinates must be in lon/lat degrees. """ if not os.path.exists(output_folder): os.mkdir(output_folder) CATALOG = 'EE' DATASET = 'SP_TILE_DSWE' # Only log in if our session expired (ugly function use to check!) if force_login or (not api._get_api_key(None)): #pylint: disable=W0212 print('Logging in to USGS EarthExplorer...') dummy_result = api.login(user, password, save=True, catalogId=CATALOG) print('Submitting EarthExplorer query...') results = api.search(DATASET, CATALOG, where={}, start_date=date, end_date=date, ll=dict([('longitude', ll_coord[0]), ('latitude', ll_coord[1])]), ur=dict([('longitude', ur_coord[0]), ('latitude', ur_coord[1])]), max_results=12, extended=False) if not results['data']: raise Exception( 'Did not find any DSWE data that matched the Landsat file!') print('Found ' + str(len(results['data']['results'])) + ' matching files.') for scene in results['data']['results']: print('Found match: ' + scene['entityId']) fname = scene['entityId'] + '.tar' output_path = os.path.join(output_folder, fname) if os.path.exists(output_path): print('Already have image on disk!') continue r = api.download(DATASET, CATALOG, [scene['entityId']], product='DSWE') print(r) if not r['data']: raise Exception('Failed to get download URL!') url = r['data'][0]['url'] cmd = ('wget "%s" --user %s --password %s -O %s' % (url, user, password, output_path)) print(cmd) os.system(cmd) if not os.path.exists(output_path): raise Exception('Failed to download file ' + output_path) print('Finished downloading DSWE files.')
def get_API_key(): user, password, _, _ = get_updd() try: return (api.login(user, password, save=True, catalogId='EE')['data']) except USGSError as e: record_error(e) raise e
def get_download_url(scene_root, verbose): if 'USGS_PASSWORD' in os.environ: if verbose: print 'logging in...' api_key = api.login(os.environ['USGS_USERID'], os.environ['USGS_PASSWORD']) if verbose: print ' api_key = %s' % api_key urls = api.download('LANDSAT_8', 'EE', [scene_root], 'STANDARD') return urls[0]
def usgs_eros(self, scene, path): """ Downloads the image from USGS """ # download from usgs if login information is provided if self.usgs_user and self.usgs_pass: try: api_key = api.login(self.usgs_user, self.usgs_pass) except USGSError as e: error_tree = ElementTree.fromstring(str(e.message)) error_text = error_tree.find("SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text raise USGSInventoryAccessMissing(error_text) download_url = api.download('LANDSAT_8', 'EE', [scene], api_key=api_key) if download_url: self.output('Source: USGS EarthExplorer', normal=True, arrow=True) return self.fetch(download_url[0], path) raise RemoteFileDoesntExist('%s is not available on AWS S3, Google or USGS Earth Explorer' % scene) raise RemoteFileDoesntExist('%s is not available on AWS S3 or Google Storage' % scene)
def usgs(self, scenes): """ Downloads the image from USGS """ if not isinstance(scenes, list): raise Exception('Expected sceneIDs list') scene_objs = Scenes() # download from usgs if login information is provided if self.usgs_user and self.usgs_pass: try: api_key = api.login(self.usgs_user, self.usgs_pass) except USGSError as e: error_tree = ElementTree.fromstring(str(e.message)) error_text = error_tree.find( "SOAP-ENV:Body/SOAP-ENV:Fault/faultstring", api.NAMESPACES).text raise USGSInventoryAccessMissing(error_text) for scene in scenes: download_urls = api.download('LANDSAT_8', 'EE', [scene], api_key=api_key) if download_urls: logger.info('Source: USGS EarthExplorer') scene_objs.add_with_files( scene, fetch(download_urls[0], self.download_dir)) else: raise RemoteFileDoesntExist( '{0} not available on AWS S3, Google or USGS Earth Explorer' .format(' - '.join(scene))) return scene_objs raise RemoteFileDoesntExist( '{0} not available on AWS S3 or Google Storage'.format( ' - '.join(scenes)))
def poll_usgs(): api_key = api.login(os.environ['USGS_USERNAME'], os.environ['USGS_PASSWORD'], save=False)['data'] now = datetime.now() fmt = '%Y%m%d' start_date = (now - timedelta(days=7)).strftime(fmt) end_date = now.strftime(fmt) where = { 20510: 'T1' # This field id represents the Collection Category } result = api.search('LANDSAT_8_C1', 'EE', start_date=start_date, end_date=end_date, where=where, api_key=api_key) # Strangely, the entity id is still used to obtain a download url. return [scene['entityId'] for scene in result['data']['results']]
def login(username, password): api_key = api.login(username, password) click.echo(api_key)
def query(self, product_type=None, items_per_page=None, page=None, count=True, **kwargs): """Search for data on USGS catalogues .. versionchanged:: 2.2.0 * Based on usgs library v0.3.0 which now uses M2M API. The library is used for both search & download .. versionchanged:: 1.0 * ``product_type`` is no longer mandatory """ product_type = kwargs.get("productType") if product_type is None: return [], 0 try: api.login( self.config.credentials["username"], self.config.credentials["password"], save=True, ) except USGSError: raise AuthenticationError( "Please check your USGS credentials.") from None product_type_def_params = self.config.products.get( product_type, self.config.products[GENERIC_PRODUCT_TYPE]) usgs_dataset = format_dict_items(product_type_def_params, **kwargs)["dataset"] start_date = kwargs.pop("startTimeFromAscendingNode", None) end_date = kwargs.pop("completionTimeFromAscendingNode", None) geom = kwargs.pop("geometry", None) footprint = {} if hasattr(geom, "bounds"): ( footprint["lonmin"], footprint["latmin"], footprint["lonmax"], footprint["latmax"], ) = geom.bounds else: footprint = geom final = [] if footprint and len(footprint.keys()) == 4: # a rectangle (or bbox) lower_left = { "longitude": footprint["lonmin"], "latitude": footprint["latmin"], } upper_right = { "longitude": footprint["lonmax"], "latitude": footprint["latmax"], } else: lower_left, upper_right = None, None try: results = api.scene_search( usgs_dataset, start_date=start_date, end_date=end_date, ll=lower_left, ur=upper_right, max_results=items_per_page, starting_number=(1 + (page - 1) * items_per_page), ) # Same method as in base.py, Search.__init__() # Prepare the metadata mapping # Do a shallow copy, the structure is flat enough for this to be sufficient metas = DEFAULT_METADATA_MAPPING.copy() # Update the defaults with the mapping value. This will add any new key # added by the provider mapping that is not in the default metadata. # A deepcopy is done to prevent self.config.metadata_mapping from being modified when metas[metadata] # is a list and is modified metas.update(copy.deepcopy(self.config.metadata_mapping)) metas = mtd_cfg_as_jsonpath(metas) for result in results["data"]["results"]: result["productType"] = usgs_dataset product_properties = properties_from_json(result, metas) final.append( EOProduct( productType=product_type, provider=self.provider, properties=product_properties, geometry=footprint, )) except USGSError as e: logger.warning( "Product type %s does not exist on USGS EE catalog", usgs_dataset, ) logger.warning("Skipping error: %s", e) api.logout() if final: # parse total_results path_parsed = parse( self.config.pagination["total_items_nb_key_path"]) total_results = path_parsed.find(results["data"])[0].value else: total_results = 0 return final, total_results
def download(self, product, auth=None, progress_callback=None, **kwargs): """Download data from USGS catalogues""" fs_path, record_filename = self._prepare_download( product, outputs_extension=".tar.gz", **kwargs) if not fs_path or not record_filename: return fs_path # progress bar init if progress_callback is None: progress_callback = get_progress_callback() progress_callback.desc = product.properties.get("id", "") progress_callback.position = 1 try: api.login( self.config.credentials["username"], self.config.credentials["password"], save=True, ) except USGSError: raise AuthenticationError( "Please check your USGS credentials.") from None download_options = api.download_options( product.properties["productType"], product.properties["id"]) try: product_ids = [ p["id"] for p in download_options["data"] if p["downloadSystem"] == "dds" ] except KeyError as e: raise NotAvailableError("%s not found in %s's products" % (e, product.properties["id"])) if not product_ids: raise NotAvailableError("No USGS products found for %s" % product.properties["id"]) req_urls = [] for product_id in product_ids: download_request = api.download_request( product.properties["productType"], product.properties["id"], product_id) try: req_urls.extend([ x["url"] for x in download_request["data"]["preparingDownloads"] ]) except KeyError as e: raise NotAvailableError("%s not found in %s download_request" % (e, product.properties["id"])) if len(req_urls) > 1: logger.warning( "%s usgs products found for %s. Only first will be downloaded" % (len(req_urls), product.properties["id"])) elif not req_urls: raise NotAvailableError("No usgs request url was found for %s" % product.properties["id"]) req_url = req_urls[0] progress_callback.reset() with requests.get( req_url, stream=True, ) as stream: try: stream.raise_for_status() except HTTPError: import traceback as tb logger.error( "Error while getting resource :\n%s", tb.format_exc(), ) else: stream_size = int(stream.headers.get("content-length", 0)) progress_callback.max_size = stream_size progress_callback.reset() with open(fs_path, "wb") as fhandle: for chunk in stream.iter_content(chunk_size=64 * 1024): if chunk: fhandle.write(chunk) progress_callback(len(chunk), stream_size) with open(record_filename, "w") as fh: fh.write(product.properties["downloadLink"]) logger.debug("Download recorded in %s", record_filename) api.logout() # Check that the downloaded file is really a tar file if not tarfile.is_tarfile(fs_path): logger.warning( "Downloaded product is not a tar File. Please check its file type before using it" ) new_fs_path = fs_path[:fs_path.index(".tar.gz")] shutil.move(fs_path, new_fs_path) return new_fs_path return self._finalize(fs_path, outputs_extension=".tar.gz", **kwargs)
from usgs import api import json import os import sys usgs_api_key = api.login(os.environ["EARTH_EXPLORER_USERNAME"], os.environ["EARTH_EXPLORER_PASSWORD"]) scenes = \ api.search( 'EO1_HYP_PUB', 'EE', api_key=usgs_api_key, start_date='2017-02-01', end_date='2017-03-01', extended=True) with open(sys.argv[1], "w") as f: f.write(json.dumps(scenes))
def login(username, password): click.echo(api.login(username, password))
i: os.path.join(barrier_indir, 'Paths_{}_coded_clean_UTM20S'.format(i)) for i in analysis_years } ###---------------------------------------- E. PREPARE DATA ---------------------------------------------------------### #----------------------------------------------------------------------------------------------------------------------- ### 1. Get and pre-process Landsat imagery # Initialize a new API instance and get an access key # The user credentials that will be used to authenticate access to the data with open( "configs.json" ) as json_data_file: # https://martin-thoma.com/configuration-files-in-python/ authdat = json.load(json_data_file) #Get temporary API key usgs_api_key = api.login(str(authdat["username"]), str(authdat["password"]), save=False, catalogId='EE')['data'] #Get list of scenes from Landsat 7 for 2000 lss_LC7_2000 = api.search(dataset='LANDSAT_ETM_C1', node='EE', ll={ "longitude": pelleextent_wgs84.XMin, "latitude": pelleextent_wgs84.YMin }, ur={ "longitude": pelleextent_wgs84.XMax, "latitude": pelleextent_wgs84.YMax }, start_date='2000-01-01', end_date='2000-12-31',
# coding=utf8 import datetime from utils import get_wrs as wrs from usgs import api import pg_database as db username = '******' password = '******' api.login(username, password) today = datetime.datetime.now() def get_today(): # Get today date to work with today = datetime.datetime.now() year = today.strftime("%Y") day = today.timetuple() if len(str(day.tm_yday)) == 1: julian_day = '00{}'.format(day.tm_yday) elif len(str(day.tm_yday)) == 2: julian_day = '0{}'.format(day.tm_yday) return (year + julian_day) def get_wrs2(latitude, longitude):
def query(self, product_type=None, **kwargs): """Search for data on USGS catalogues .. versionchanged:: 1.0 * ``product_type`` is no longer mandatory """ product_type = kwargs.get("productType") if product_type is None: return [], 0 api.login( self.config.credentials["username"], self.config.credentials["password"], save=True, ) usgs_dataset = self.config.products[product_type]["dataset"] usgs_catalog_node = self.config.products[product_type]["catalog_node"] start_date = kwargs.pop("startTimeFromAscendingNode", None) end_date = kwargs.pop("completionTimeFromAscendingNode", None) footprint = kwargs.pop("geometry", None) # Configuration to generate the download url of search results result_summary_pattern = re.compile( r"^ID: .+, Acquisition Date: .+, Path: (?P<path>\d+), Row: (?P<row>\d+)$" # noqa ) # See https://pyformat.info/, on section "Padding and aligning strings" to # understand {path:0>3} and {row:0>3}. # It roughly means: 'if the string that will be passed as "path" has length < 3, # prepend as much "0"s as needed to reach length 3' and same for "row" dl_url_pattern = "{base_url}/L8/{path:0>3}/{row:0>3}/{entity}.tar.bz" final = [] if footprint and len(footprint.keys()) == 4: # a rectangle (or bbox) lower_left = { "longitude": footprint["lonmin"], "latitude": footprint["latmin"], } upper_right = { "longitude": footprint["lonmax"], "latitude": footprint["latmax"], } else: lower_left, upper_right = None, None try: results = api.search( usgs_dataset, usgs_catalog_node, start_date=start_date, end_date=end_date, ll=lower_left, ur=upper_right, ) for result in results["data"]["results"]: r_lower_left = result["spatialFootprint"]["coordinates"][0][0] r_upper_right = result["spatialFootprint"]["coordinates"][0][2] summary_match = result_summary_pattern.match( result["summary"]).groupdict() result["geometry"] = geometry.box(r_lower_left[0], r_lower_left[1], r_upper_right[0], r_upper_right[1]) # Same method as in base.py, Search.__init__() # Prepare the metadata mapping # Do a shallow copy, the structure is flat enough for this to be sufficient metas = DEFAULT_METADATA_MAPPING.copy() # Update the defaults with the mapping value. This will add any new key # added by the provider mapping that is not in the default metadata. # A deepcopy is done to prevent self.config.metadata_mapping from being modified when metas[metadata] # is a list and is modified metas.update(copy.deepcopy(self.config.metadata_mapping)) metas = mtd_cfg_as_jsonpath(metas) result["productType"] = usgs_dataset product_properties = properties_from_json(result, metas) if getattr(self.config, "product_location_scheme", "https") == "file": product_properties["downloadLink"] = dl_url_pattern.format( base_url="file://") else: product_properties["downloadLink"] = dl_url_pattern.format( base_url=self.config.google_base_url.rstrip("/"), entity=result["entityId"], **summary_match) final.append( EOProduct( productType=product_type, provider=self.provider, properties=product_properties, geometry=footprint, )) except USGSError as e: logger.debug( "Product type %s does not exist on catalogue %s", usgs_dataset, usgs_catalog_node, ) logger.debug("Skipping error: %s", e) api.logout() return final, len(final)
def login(username, password): api_key = api.login(username, password) print(api_key)
def main(argsIn): #pylint: disable=R0914,R0912 try: usage = "usage: fetch_hdds_images.py [options]" parser = argparse.ArgumentParser(usage=usage) parser.add_argument("--output-folder", dest="output_folder", required=True, help="Download files to this folder.") parser.add_argument("--user", dest="user", required=True, help="User name for EarthExplorer website.") parser.add_argument("--password", dest="password", required=True, help="Password name for EarthExplorer website.") parser.add_argument( "--force-login", action="store_true", dest="force_login", default=False, help="Don't reuse the cached EE API key if present.") parser.add_argument("--refetch-datasets", action="store_true", dest="refetch_datasets", default=False, help="Force a refetch of the dataset list.") parser.add_argument( "--refetch-scenes", action="store_true", dest="refetch_scenes", default=False, help="Force refetches of scene lists for each dataset.") parser.add_argument( "--image-list-path", dest="image_list_path", default=None, help= "Path to text file containing list of image IDs to download, one per line." ) parser.add_argument("--event-name", dest="event_name", default=None, help="Only download images from this event.") options = parser.parse_args(argsIn) except argparse.ArgumentError: print(usage) return -1 if options.output_folder and not os.path.exists(options.output_folder): os.mkdir(options.output_folder) images_to_use = [] if options.image_list_path: with open(options.image_list_path, 'r') as f: for line in f: images_to_use.append(line.strip()) # Only log in if our session expired (ugly function use to check!) if options.force_login or (not api._get_api_key(None)): #pylint: disable=W0212 print('Logging in to USGS EarthExplorer...') api.login(options.user, options.password, save=True, catalogId=CATALOG) #pylint: disable=W0612 print(api._get_api_key(None)) #pylint: disable=W0212 raise Exception('DEBUG') # Retrieve all of the available datasets dataset_list = get_dataset_list(options) print('Found ' + str(len(dataset_list)) + ' useable datasets.') #raise Exception('debug') # Don't think we need to do this! #get_dataset_fields(dataset_list) # TODO: Work through some errors. counter = 0 for (dataset, full_name) in dataset_list: counter = counter + 1 #if counter == 1: # continue if options.event_name: # Only download images from the specified event if options.event_name.lower() not in full_name.lower(): continue dataset_folder = os.path.join(options.output_folder, full_name) scene_list_path = os.path.join(dataset_folder, 'scene_list.dat') done_flag_path = os.path.join(dataset_folder, 'done.flag') if not os.path.exists(dataset_folder): os.mkdir(dataset_folder) if os.path.exists(done_flag_path) and not options.refetch_scenes: print('Skipping completed dataset ' + full_name) continue print('--> Search scenes for: ' + full_name) BATCH_SIZE = 10000 if not os.path.exists(scene_list_path) or options.refetch_scenes: # Request the scene list from USGS #details = {'Agency - Platform - Vendor':'WORLDVIEW', 'Sensor Type':'MS'} #details = {'sensor_type':'MS'} details = {} # TODO: How do these work?? # Large sets of results require multiple queries in order to get all of the data done = False error = False all_scenes = [] # Acculumate all scene data here while not done: print('Searching with start offset = ' + str(len(all_scenes))) results = api.search(dataset, CATALOG, where=details, max_results=BATCH_SIZE, starting_number=len(all_scenes), extended=False) if 'results' not in results['data']: print('ERROR: Failed to get any results for dataset: ' + full_name) error = True break if len(results['data']['results']) < BATCH_SIZE: done = True all_scenes += results['data']['results'] if error: continue results['data']['results'] = all_scenes # Cache the results to disk with open(scene_list_path, 'wb') as f: pickle.dump(results, f) else: # Load the results from the cache file with open(scene_list_path, 'rb') as f: results = pickle.load(f) print('Got ' + str(len(results['data']['results'])) + ' scene results.') for scene in results['data']['results']: fail = False REQUIRED_PARTS = ['displayId', 'summary', 'entityId', 'displayId'] for p in REQUIRED_PARTS: if (p not in scene) or (not scene[p]): print('scene object is missing element: ' + p) print(scene) fail = True if fail: continue # If image list was provided skip other image names if images_to_use and (scene['displayId'] not in images_to_use): continue # Figure out the downloaded file path for this image file_name = scene['displayId'] + '.zip' output_path = os.path.join(dataset_folder, file_name) if not os.path.exists(dataset_folder): os.mkdir(dataset_folder) if os.path.exists(output_path): continue # Already have the file! # Check if this is one of the sensors we are interested in. DESIRED_SENSORS = [('worldview', 'hp'), ('worldview', 'msi')] # TODO: Add more parts = scene['summary'].lower().split(',') platform = None sensor = None for part in parts: if 'platform:' in part: platform = part.split(':')[1].strip() if 'sensor:' in part: sensor = part.split(':')[1].strip() if (not platform) or (not sensor): raise Exception('Unknown sensor: ' + scene['summary']) if (platform, sensor) not in DESIRED_SENSORS: print((platform, sensor)) print('Undesired sensor: ' + scene['summary']) continue # Investigate the number of bands PLATFORM_BAND_COUNTS = {'worldview': 8, 'TODO': 1} min_num_bands = PLATFORM_BAND_COUNTS[platform] num_bands = None try: meta = api.metadata(dataset, CATALOG, scene['entityId']) except json.decoder.JSONDecodeError: print('Error fetching metadata for dataset = ' + dataset + ', entity = ' + scene['entityId']) continue try: for m in meta['data'][0]['metadataFields']: if m['fieldName'] == 'Number of bands': num_bands = int(m['value']) break if not num_bands: raise KeyError() # Treat like the except case if num_bands < min_num_bands: print('Skipping %s, too few bands: %d' % (scene['displayId'], num_bands)) continue except KeyError: print('Unable to perform metadata check!') print(meta) # Make sure we know which file option to download try: types = api.download_options(dataset, CATALOG, scene['entityId']) except json.decoder.JSONDecodeError: print('Error decoding download options!') continue if not types['data'] or not types['data'][0]: raise Exception('Need to handle types: ' + str(types)) ready = False download_type = 'STANDARD' # TODO: Does this ever change? for o in types['data'][0]['downloadOptions']: if o['available'] and o['downloadCode'] == download_type: ready = True break if not ready: raise Exception('Missing download option for scene: ' + str(types)) # Get the download URL of the file we want. r = api.download(dataset, CATALOG, [scene['entityId']], product=download_type) try: url = r['data'][0]['url'] except Exception as e: raise Exception('Failed to get download URL from result: ' + str(r)) from e print(scene['summary']) # Finally download the data! cmd = ('wget "%s" --user %s --password %s -O %s' % (url, options.user, options.password, output_path)) print(cmd) os.system(cmd) #raise Exception('DEBUG') print('Finished processing dataset: ' + full_name) os.system('touch ' + done_flag_path) # Mark this dataset as finished #raise Exception('DEBUG') #if not os.path.exists(output_path): # raise Exception('Failed to download file ' + output_path) print('Finished downloading HDDS! files.') # Can just let this time out #api.logout() return 0