def query_for_scenes(start_date, end_date, verbose=False, limit=None): if 'USGS_PASSWORD' in os.environ: api_key = retry_login(verbose=verbose) if not api_key: print "Failed to authenticate with USGS servers" sys.exit(1) full_list = [] list_offset = 0 these_scenes = 'start' chunk_size = 500 if limit is not None and limit < chunk_size: chunk_size = limit if verbose: print 'search...' while these_scenes == 'start' or len(these_scenes) == chunk_size: these_scenes = api.search("LANDSAT_8", "EE", start_date=start_date, end_date=end_date, starting_number = 1+list_offset, max_results=chunk_size) if verbose: print '... %d scenes' % len(these_scenes) full_list += these_scenes list_offset += len(these_scenes) if limit is not None and list_offset >= limit: break scene_ids = [scene['entityId'] for scene in full_list] return scene_ids
def poll_usgs(): """ Check whether USGS has made any new scenes available. In the case of RT scenes, we check only a few days back. In the case of T1/T2 scenes we check 4 weeks back due to processing latencies. """ api_key = api.login(os.environ['USGS_USERNAME'], os.environ['USGS_PASSWORD'], save=False)['data'] tier = os.environ['TIER'] now = datetime.now() fmt = '%Y-%m-%d' days_prior = 4 if tier == 'RT' else 30 start_date = (now - timedelta(days=days_prior)).strftime(fmt) end_date = now.strftime(fmt) # This field id represents the Collection Category where = { 20510: tier } result = api.search( 'LANDSAT_8_C1', 'EE', start_date=start_date, end_date=end_date, where=where, api_key=api_key) # Strangely, the entity id is still used to obtain a download url. entityIds = [ scene['entityId'] for scene in result['data']['results'] ] return entityIds
def search_scenes(dataset, latitud, longitud): # Set the Hyperion and Landsat 8 dataset #hyperion_dataset = 'EO1_HYP_PUB' #landsat8_dataset = 'LANDSAT_8' # Set the EarthExplorer catalog node = 'EE' # Set the scene ids scenes = api.search(dataset, node, lat=latitud, lng=longitud, distance=100, ll=None, ur=None, start_date='2017-02-15', end_date=today.strftime('%Y-%m-%d'), where=None, max_results=50000, starting_number=1, sort_order="DESC", extended=False, api_key=None) scenes_list = [] for scene in scenes: scenes_list.append(scene) return scenes_list
def create_snapshots(): """ Run requests against USGS API for use in tests. """ api_key = api.login(os.environ['USGS_USERNAME'], os.environ['USGS_PASSWORD']) # Dataset Fields response = api.dataset_fields("LANDSAT_8_C1", "EE", api_key=api_key) write_response(response, 'dataset-fields.json') # Datasets response = api.datasets(None, "EE") write_response(response, 'datasets.json') # Download response = api.download("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"], product='STANDARD') write_response(response, 'download.json') # Download Options response = api.download_options("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"]) write_response(response, 'download-options.json') # Metadata response = api.metadata("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"]) write_response(response, 'metadata.json') # Search response = api.search("LANDSAT_8_C1", "EE", start_date='20170401', end_date='20170402', max_results=10) write_response(response, 'search.json') api.logout(api_key)
def _query_by_point(self, lon, lat, date_start, date_end): """ Query by point using usgs api. Args: lon, lat (float): Coordinates. date_start, date_end (str): The start and end date. Returns: query object. """ return api.search( self._dataset, self._node, lat=lat, lng=lon, distance=100, # ll={ "longitude": 108.963791 , # "latitude": 19.845140}, # ur={ "longitude": 110.266751 , # "latitude": 20.831747}, start_date=date_start, end_date=date_end, api_key=self.api_key, )
def query_for_scenes(start_date, end_date, verbose=False, limit=None): if 'USGS_PASSWORD' in os.environ: if verbose: print 'logging in...' api_key = api.login(os.environ['USGS_USERID'], os.environ['USGS_PASSWORD']) if verbose: print ' api_key = %s' % api_key full_list = [] list_offset = 0 these_scenes = 'start' chunk_size = 500 if limit is not None and limit < chunk_size: chunk_size = limit if verbose: print 'search...' while these_scenes == 'start' or len(these_scenes) == chunk_size: these_scenes = api.search("LANDSAT_8", "EE", start_date=start_date, end_date=end_date, starting_number=1 + list_offset, max_results=chunk_size) if verbose: print '... %d scenes' % len(these_scenes) full_list += these_scenes list_offset += len(these_scenes) if limit is not None and list_offset >= limit: break scene_ids = [scene['entityId'] for scene in full_list] return scene_ids
def search(dataset, node, aoi, start_date, end_date, longitude, latitude, distance, lower_left, upper_right, where, api_key, geojson): node = get_node(dataset, node) if aoi == "-": src = click.open_file('-').readlines() aoi = json.loads(''.join([ line.strip() for line in src ])) bbox = map(get_bbox, aoi.get('features'))[0] lower_left = bbox[0:2] upper_right = bbox[2:4] if where: # Query the dataset fields endpoint for queryable fields fields = api.dataset_fields(dataset, node) def format_fieldname(s): return ''.join(c for c in s if c.isalnum()).lower() field_lut = { format_fieldname(field['name']): field['fieldId'] for field in fields } where = { field_lut[format_fieldname(k)]: v for k, v in where if format_fieldname(k) in field_lut } if lower_left: lower_left = dict(zip(['longitude', 'latitude'], lower_left)) upper_right = dict(zip(['longitude', 'latitude'], upper_right)) data = api.search(dataset, node, lat=latitude, lng=longitude, distance=distance, ll=lower_left, ur=upper_right, start_date=start_date, end_date=end_date, where=where, api_key=api_key) if geojson: features = map(to_geojson_feature, data) data = { 'type': 'FeatureCollection', 'features': features } print(json.dumps(data))
def query_for_scenes(start_date, end_date, verbose=False, limit=None): if 'USGS_PASSWORD' in os.environ: if verbose: print 'logging in...' api_key = api.login(os.environ['USGS_USERID'], os.environ['USGS_PASSWORD']) if verbose: print ' api_key = %s' % api_key full_list = [] list_offset = 0 these_scenes = 'start' chunk_size = 500 if limit is not None and limit < chunk_size: chunk_size = limit if verbose: print 'search...' while these_scenes == 'start' or len(these_scenes) == chunk_size: these_scenes = api.search("LANDSAT_8", "EE", start_date=start_date, end_date=end_date, starting_number = 1+list_offset, max_results=chunk_size) if verbose: print '... %d scenes' % len(these_scenes) full_list += these_scenes list_offset += len(these_scenes) if limit is not None and list_offset >= limit: break scene_ids = [scene['entityId'] for scene in full_list] return scene_ids
def fetch_dswe_images(date, ll_coord, ur_coord, output_folder, user, password, force_login): """Download all DSWE images that fit the given criteria to the output folder if they are not already present. The coordinates must be in lon/lat degrees. """ if not os.path.exists(output_folder): os.mkdir(output_folder) CATALOG = 'EE' DATASET = 'SP_TILE_DSWE' # Only log in if our session expired (ugly function use to check!) if force_login or (not api._get_api_key(None)): #pylint: disable=W0212 print('Logging in to USGS EarthExplorer...') dummy_result = api.login(user, password, save=True, catalogId=CATALOG) print('Submitting EarthExplorer query...') results = api.search(DATASET, CATALOG, where={}, start_date=date, end_date=date, ll=dict([('longitude', ll_coord[0]), ('latitude', ll_coord[1])]), ur=dict([('longitude', ur_coord[0]), ('latitude', ur_coord[1])]), max_results=12, extended=False) if not results['data']: raise Exception( 'Did not find any DSWE data that matched the Landsat file!') print('Found ' + str(len(results['data']['results'])) + ' matching files.') for scene in results['data']['results']: print('Found match: ' + scene['entityId']) fname = scene['entityId'] + '.tar' output_path = os.path.join(output_folder, fname) if os.path.exists(output_path): print('Already have image on disk!') continue r = api.download(DATASET, CATALOG, [scene['entityId']], product='DSWE') print(r) if not r['data']: raise Exception('Failed to get download URL!') url = r['data'][0]['url'] cmd = ('wget "%s" --user %s --password %s -O %s' % (url, user, password, output_path)) print(cmd) os.system(cmd) if not os.path.exists(output_path): raise Exception('Failed to download file ' + output_path) print('Finished downloading DSWE files.')
def search(dataset, node, aoi, start_date, end_date, longitude, latitude, distance, lower_left, upper_right, where, geojson, extended, api_key): node = get_node(dataset, node) if aoi == "-": src = click.open_file('-') if not src.isatty(): lines = src.readlines() if len(lines) > 0: aoi = json.loads(''.join([line.strip() for line in lines])) bbox = map(get_bbox, aoi.get('features') or [aoi])[0] lower_left = bbox[0:2] upper_right = bbox[2:4] if where: # Query the dataset fields endpoint for queryable fields resp = api.dataset_fields(dataset, node) def format_fieldname(s): return ''.join(c for c in s if c.isalnum()).lower() field_lut = { format_fieldname(field['name']): field['fieldId'] for field in resp['data'] } where = { field_lut[format_fieldname(k)]: v for k, v in where if format_fieldname(k) in field_lut } if lower_left: lower_left = dict(zip(['longitude', 'latitude'], lower_left)) upper_right = dict(zip(['longitude', 'latitude'], upper_right)) result = api.search(dataset, node, lat=latitude, lng=longitude, distance=distance, ll=lower_left, ur=upper_right, start_date=start_date, end_date=end_date, where=where, extended=extended, api_key=api_key) if geojson: result = to_geojson(result) print(json.dumps(result))
def test_search(): expected_keys = ["totalHits", "firstRecord", "nextRecord", "results", "numberReturned", "lastRecord"] fieldid = 20516 # TODO: compute by finding 'WRS Path' in result of api.dataset_fields("LANDSAT_8_C1", "EE") response = api.search("LANDSAT_8_C1", "EE", start_date='20170401', end_date='20170402', where={fieldid: '032'}, max_results=10) assert check_root_keys(response) return assert len(response['data']["results"]) == 10 for item in response['data']['results']: pr = item['displayId'].split('_')[2] assert pr[-3:] == '032' data = response['data'] for key in expected_keys: assert key in data
def test_search(): expected_keys = [ "totalHits", "firstRecord", "nextRecord", "results", "numberReturned", "lastRecord" ] response = api.search("LANDSAT_8_C1", "EE", start_date='20170401', end_date='20170402', max_results=10) assert check_root_keys(response) assert len(response['data']["results"]) == 10 data = response['data'] for key in expected_keys: assert key in data
def search(dataset, node, aoi, start_date, end_date, longitude, latitude, distance, lower_left, upper_right, where, max_results, geojson, extended, api_key): node = get_node(dataset, node) if aoi == "-": src = click.open_file('-') if not src.isatty(): lines = src.readlines() if len(lines) > 0: aoi = json.loads(''.join([ line.strip() for line in lines ])) bbox = map(get_bbox, aoi.get('features') or [aoi])[0] lower_left = bbox[0:2] upper_right = bbox[2:4] if where: # Query the dataset fields endpoint for queryable fields resp = api.dataset_fields(dataset, node) def format_fieldname(s): return ''.join(c for c in s if c.isalnum()).lower() field_lut = { format_fieldname(field['name']): field['fieldId'] for field in resp['data'] } where = { field_lut[format_fieldname(k)]: v for k, v in where if format_fieldname(k) in field_lut } if lower_left: lower_left = dict(zip(['longitude', 'latitude'], lower_left)) upper_right = dict(zip(['longitude', 'latitude'], upper_right)) result = api.search( dataset, node, lat=latitude, lng=longitude, distance=distance, ll=lower_left, ur=upper_right, start_date=start_date, end_date=end_date, where=where, max_results=max_results, extended=extended, api_key=api_key) if geojson: result = to_geojson(result) print(json.dumps(result))
def querySceneLists(collection, ll, ur, start_date, end_date, api_key): # TODO: option to order with just a point. Look back at usgs.api.search options """ Send a request to earth explorer api Args: collection (string) one of 'LSR_LANDSAT_ETM_COMBINED', 'LSR_LANDSAT_8', and 'LSR_LANDSAT_TM' ll (dict) lowerLeft corner dict with longitude and latitude keys ur (dict) upperRight corner dict with longitude and latitude keys dates (strings) with '%Y-%m-%d' format api_key (string) usgs api key (retrieve it using the 'usgs login' command line) """ scenes = api.search(collection, 'EE',\ ll=ll,\ ur=ur,\ start_date=start_date,\ end_date=end_date,\ api_key=api_key) scene_list = [] for scene in scenes: scene_list.append(scene['entityId']) return scene_list
def retry_search(start_date, end_date, starting_number, max_results, retries=4, verbose=False): """ Retry USGS search multiple times, with exponential backoff between. Required due to rate-limits imposed by USGS. """ if verbose: logging.info("searching ....") sleep_time = 5 for _ in xrange(retries + 1): try: scenes = api.search("LANDSAT_8", "EE", start_date=start_date, end_date=end_date, starting_number=starting_number, max_results=max_results) return scenes except USGSError: logging.info("USGS search failed. Retry in %s" % sleep_time) time.sleep(sleep_time) sleep_time *= backoff_factor(2) return None
def test_search(): expected_keys = [ "totalHits", "firstRecord", "nextRecord", "results", "numberReturned", "lastRecord" ] fieldid = 20516 # TODO: compute by finding 'WRS Path' in result of api.dataset_fields("LANDSAT_8_C1", "EE") response = api.search("LANDSAT_8_C1", "EE", start_date='20170401', end_date='20170402', where={fieldid: '032'}, max_results=10) assert check_root_keys(response) return assert len(response['data']["results"]) == 10 for item in response['data']['results']: pr = item['displayId'].split('_')[2] assert pr[-3:] == '032' data = response['data'] for key in expected_keys: assert key in data
def poll_usgs(): api_key = api.login(os.environ['USGS_USERNAME'], os.environ['USGS_PASSWORD'], save=False)['data'] now = datetime.now() fmt = '%Y%m%d' start_date = (now - timedelta(days=7)).strftime(fmt) end_date = now.strftime(fmt) where = { 20510: 'T1' # This field id represents the Collection Category } result = api.search('LANDSAT_8_C1', 'EE', start_date=start_date, end_date=end_date, where=where, api_key=api_key) # Strangely, the entity id is still used to obtain a download url. return [scene['entityId'] for scene in result['data']['results']]
) as json_data_file: # https://martin-thoma.com/configuration-files-in-python/ authdat = json.load(json_data_file) #Get temporary API key usgs_api_key = api.login(str(authdat["username"]), str(authdat["password"]), save=False, catalogId='EE')['data'] #Get list of scenes from Landsat 7 for 2000 lss_LC7_2000 = api.search(dataset='LANDSAT_ETM_C1', node='EE', ll={ "longitude": pelleextent_wgs84.XMin, "latitude": pelleextent_wgs84.YMin }, ur={ "longitude": pelleextent_wgs84.XMax, "latitude": pelleextent_wgs84.YMax }, start_date='2000-01-01', end_date='2000-12-31', api_key=usgs_api_key) #Download and unzip landsat 7 scenes with least cloud cover for 2000 dl_landsattiles(lss=lss_LC7_2000, dataset='LANDSAT_ETM_C1', apikey=usgs_api_key, outdir=landsatdir, mincctile=True, maxcloudcover=100)
def main(): API_KEY = custom_functions.get_API_key() global no_data _, _, _, DATA_DICT_PATH = custom_functions.get_updd() Path_i = 1 Row_i = 1 os.makedirs(os.path.dirname(DATA_DICT_PATH), exist_ok=True) try: with open(DATA_DICT_PATH, 'rb') as fp: no_data = pickle.load(fp) try: Path_i = sorted(no_data.keys())[-1] Row_i = no_data[Path_i][-1] except Exception as e: no_data = {} Path_i = 1 Row_i = 1 except FileNotFoundError as e: no_data = {} # incase path_i and row_i are taken from data_dict, they would be strings Path_i = int(Path_i) Row_i = int(Row_i) if Path_i > 1 or Row_i > 1: print(f'DATA_DICT already has data till Path {Path_i} and Row {Row_i}') dataset = 'LANDSAT_8_C1' for Path in range(Path_i, 234): for Row in range(1, 249): if (Path == Path_i) and (Row in range(1, Row_i + 1)): continue Path = str(Path) if len(Path) == 1: Path = '00' + Path elif len(Path) == 2: Path = '0' + Path Row = str(Row) if len(Row) == 1: Row = '00' + Row elif len(Row) == 2: Row = '0' + Row where = {20514: Path, 20516: Row} try: response = (api.search(dataset=dataset, node='EE', where=where, api_key=API_KEY)) except USGSError as e: if 'AUTH_UNAUTHORIZED' in e: API_KEY = custom_functions.get_API_key() response = (api.search(dataset=dataset, node='EE', where=where, api_key=API_KEY)) else: custom_functions.record_error( e + f'\nERROR in create_dict.py\nAPI search error at Path {Path} and Row {Row}\n' ) continue if (response['errorCode'] is not None): custom_functions.record_error( e + f'\nERROR in create_dict.py\nerrorCode {response["errorCode"]} received during query at Path {Path} and Row {Row}\n' ) if (response['data']['numberReturned'] == 0): continue no_data.setdefault(Path, []) no_data[Path].append(Row) print(f'Path {Path} and Row {Row} has some data') with open(DATA_DICT_PATH, 'wb') as fp: pickle.dump(no_data, fp)
def query(self, product_type=None, **kwargs): """Search for data on USGS catalogues .. versionchanged:: 1.0 * ``product_type`` is no longer mandatory """ product_type = kwargs.get("productType") if product_type is None: return [], 0 api.login( self.config.credentials["username"], self.config.credentials["password"], save=True, ) usgs_dataset = self.config.products[product_type]["dataset"] usgs_catalog_node = self.config.products[product_type]["catalog_node"] start_date = kwargs.pop("startTimeFromAscendingNode", None) end_date = kwargs.pop("completionTimeFromAscendingNode", None) footprint = kwargs.pop("geometry", None) # Configuration to generate the download url of search results result_summary_pattern = re.compile( r"^ID: .+, Acquisition Date: .+, Path: (?P<path>\d+), Row: (?P<row>\d+)$" # noqa ) # See https://pyformat.info/, on section "Padding and aligning strings" to # understand {path:0>3} and {row:0>3}. # It roughly means: 'if the string that will be passed as "path" has length < 3, # prepend as much "0"s as needed to reach length 3' and same for "row" dl_url_pattern = "{base_url}/L8/{path:0>3}/{row:0>3}/{entity}.tar.bz" final = [] if footprint and len(footprint.keys()) == 4: # a rectangle (or bbox) lower_left = { "longitude": footprint["lonmin"], "latitude": footprint["latmin"], } upper_right = { "longitude": footprint["lonmax"], "latitude": footprint["latmax"], } else: lower_left, upper_right = None, None try: results = api.search( usgs_dataset, usgs_catalog_node, start_date=start_date, end_date=end_date, ll=lower_left, ur=upper_right, ) for result in results["data"]["results"]: r_lower_left = result["spatialFootprint"]["coordinates"][0][0] r_upper_right = result["spatialFootprint"]["coordinates"][0][2] summary_match = result_summary_pattern.match( result["summary"]).groupdict() result["geometry"] = geometry.box(r_lower_left[0], r_lower_left[1], r_upper_right[0], r_upper_right[1]) # Same method as in base.py, Search.__init__() # Prepare the metadata mapping # Do a shallow copy, the structure is flat enough for this to be sufficient metas = DEFAULT_METADATA_MAPPING.copy() # Update the defaults with the mapping value. This will add any new key # added by the provider mapping that is not in the default metadata. # A deepcopy is done to prevent self.config.metadata_mapping from being modified when metas[metadata] # is a list and is modified metas.update(copy.deepcopy(self.config.metadata_mapping)) metas = mtd_cfg_as_jsonpath(metas) result["productType"] = usgs_dataset product_properties = properties_from_json(result, metas) if getattr(self.config, "product_location_scheme", "https") == "file": product_properties["downloadLink"] = dl_url_pattern.format( base_url="file://") else: product_properties["downloadLink"] = dl_url_pattern.format( base_url=self.config.google_base_url.rstrip("/"), entity=result["entityId"], **summary_match) final.append( EOProduct( productType=product_type, provider=self.provider, properties=product_properties, geometry=footprint, )) except USGSError as e: logger.debug( "Product type %s does not exist on catalogue %s", usgs_dataset, usgs_catalog_node, ) logger.debug("Skipping error: %s", e) api.logout() return final, len(final)
from usgs import api import json import os import sys usgs_api_key = api.login(os.environ["EARTH_EXPLORER_USERNAME"], os.environ["EARTH_EXPLORER_PASSWORD"]) scenes = \ api.search( 'EO1_HYP_PUB', 'EE', api_key=usgs_api_key, start_date='2017-02-01', end_date='2017-03-01', extended=True) with open(sys.argv[1], "w") as f: f.write(json.dumps(scenes))
] import numpy as np start_dates = [str(i) + "-06-01" for i in np.arange(2002, 2020)] end_dates = [str(i) + "-09-30" for i in np.arange(2002, 2020)] all_results = [] for start_date, end_date in zip(start_dates, end_dates): results = api.search(dataset="ARD_TILE", node="EE", start_date=start_date, end_date=end_date, ll={ "longitude": -104.1064453125, "latitude": 39.825413103424786 }, ur={ "longitude": -95.130615234375, "latitude": 43.100982876188546 }, api_key=token) all_results.extend(results['data']['results']) print(start_date + " done") filtered_results = [] for i in all_results: if "landsat_etm_c1" in i['browseUrl']: pass else: if i['cloudCover'] == None:
def main(argsIn): #pylint: disable=R0914,R0912 try: usage = "usage: fetch_hdds_images.py [options]" parser = argparse.ArgumentParser(usage=usage) parser.add_argument("--output-folder", dest="output_folder", required=True, help="Download files to this folder.") parser.add_argument("--user", dest="user", required=True, help="User name for EarthExplorer website.") parser.add_argument("--password", dest="password", required=True, help="Password name for EarthExplorer website.") parser.add_argument( "--force-login", action="store_true", dest="force_login", default=False, help="Don't reuse the cached EE API key if present.") parser.add_argument("--refetch-datasets", action="store_true", dest="refetch_datasets", default=False, help="Force a refetch of the dataset list.") parser.add_argument( "--refetch-scenes", action="store_true", dest="refetch_scenes", default=False, help="Force refetches of scene lists for each dataset.") parser.add_argument( "--image-list-path", dest="image_list_path", default=None, help= "Path to text file containing list of image IDs to download, one per line." ) parser.add_argument("--event-name", dest="event_name", default=None, help="Only download images from this event.") options = parser.parse_args(argsIn) except argparse.ArgumentError: print(usage) return -1 if options.output_folder and not os.path.exists(options.output_folder): os.mkdir(options.output_folder) images_to_use = [] if options.image_list_path: with open(options.image_list_path, 'r') as f: for line in f: images_to_use.append(line.strip()) # Only log in if our session expired (ugly function use to check!) if options.force_login or (not api._get_api_key(None)): #pylint: disable=W0212 print('Logging in to USGS EarthExplorer...') api.login(options.user, options.password, save=True, catalogId=CATALOG) #pylint: disable=W0612 print(api._get_api_key(None)) #pylint: disable=W0212 raise Exception('DEBUG') # Retrieve all of the available datasets dataset_list = get_dataset_list(options) print('Found ' + str(len(dataset_list)) + ' useable datasets.') #raise Exception('debug') # Don't think we need to do this! #get_dataset_fields(dataset_list) # TODO: Work through some errors. counter = 0 for (dataset, full_name) in dataset_list: counter = counter + 1 #if counter == 1: # continue if options.event_name: # Only download images from the specified event if options.event_name.lower() not in full_name.lower(): continue dataset_folder = os.path.join(options.output_folder, full_name) scene_list_path = os.path.join(dataset_folder, 'scene_list.dat') done_flag_path = os.path.join(dataset_folder, 'done.flag') if not os.path.exists(dataset_folder): os.mkdir(dataset_folder) if os.path.exists(done_flag_path) and not options.refetch_scenes: print('Skipping completed dataset ' + full_name) continue print('--> Search scenes for: ' + full_name) BATCH_SIZE = 10000 if not os.path.exists(scene_list_path) or options.refetch_scenes: # Request the scene list from USGS #details = {'Agency - Platform - Vendor':'WORLDVIEW', 'Sensor Type':'MS'} #details = {'sensor_type':'MS'} details = {} # TODO: How do these work?? # Large sets of results require multiple queries in order to get all of the data done = False error = False all_scenes = [] # Acculumate all scene data here while not done: print('Searching with start offset = ' + str(len(all_scenes))) results = api.search(dataset, CATALOG, where=details, max_results=BATCH_SIZE, starting_number=len(all_scenes), extended=False) if 'results' not in results['data']: print('ERROR: Failed to get any results for dataset: ' + full_name) error = True break if len(results['data']['results']) < BATCH_SIZE: done = True all_scenes += results['data']['results'] if error: continue results['data']['results'] = all_scenes # Cache the results to disk with open(scene_list_path, 'wb') as f: pickle.dump(results, f) else: # Load the results from the cache file with open(scene_list_path, 'rb') as f: results = pickle.load(f) print('Got ' + str(len(results['data']['results'])) + ' scene results.') for scene in results['data']['results']: fail = False REQUIRED_PARTS = ['displayId', 'summary', 'entityId', 'displayId'] for p in REQUIRED_PARTS: if (p not in scene) or (not scene[p]): print('scene object is missing element: ' + p) print(scene) fail = True if fail: continue # If image list was provided skip other image names if images_to_use and (scene['displayId'] not in images_to_use): continue # Figure out the downloaded file path for this image file_name = scene['displayId'] + '.zip' output_path = os.path.join(dataset_folder, file_name) if not os.path.exists(dataset_folder): os.mkdir(dataset_folder) if os.path.exists(output_path): continue # Already have the file! # Check if this is one of the sensors we are interested in. DESIRED_SENSORS = [('worldview', 'hp'), ('worldview', 'msi')] # TODO: Add more parts = scene['summary'].lower().split(',') platform = None sensor = None for part in parts: if 'platform:' in part: platform = part.split(':')[1].strip() if 'sensor:' in part: sensor = part.split(':')[1].strip() if (not platform) or (not sensor): raise Exception('Unknown sensor: ' + scene['summary']) if (platform, sensor) not in DESIRED_SENSORS: print((platform, sensor)) print('Undesired sensor: ' + scene['summary']) continue # Investigate the number of bands PLATFORM_BAND_COUNTS = {'worldview': 8, 'TODO': 1} min_num_bands = PLATFORM_BAND_COUNTS[platform] num_bands = None try: meta = api.metadata(dataset, CATALOG, scene['entityId']) except json.decoder.JSONDecodeError: print('Error fetching metadata for dataset = ' + dataset + ', entity = ' + scene['entityId']) continue try: for m in meta['data'][0]['metadataFields']: if m['fieldName'] == 'Number of bands': num_bands = int(m['value']) break if not num_bands: raise KeyError() # Treat like the except case if num_bands < min_num_bands: print('Skipping %s, too few bands: %d' % (scene['displayId'], num_bands)) continue except KeyError: print('Unable to perform metadata check!') print(meta) # Make sure we know which file option to download try: types = api.download_options(dataset, CATALOG, scene['entityId']) except json.decoder.JSONDecodeError: print('Error decoding download options!') continue if not types['data'] or not types['data'][0]: raise Exception('Need to handle types: ' + str(types)) ready = False download_type = 'STANDARD' # TODO: Does this ever change? for o in types['data'][0]['downloadOptions']: if o['available'] and o['downloadCode'] == download_type: ready = True break if not ready: raise Exception('Missing download option for scene: ' + str(types)) # Get the download URL of the file we want. r = api.download(dataset, CATALOG, [scene['entityId']], product=download_type) try: url = r['data'][0]['url'] except Exception as e: raise Exception('Failed to get download URL from result: ' + str(r)) from e print(scene['summary']) # Finally download the data! cmd = ('wget "%s" --user %s --password %s -O %s' % (url, options.user, options.password, output_path)) print(cmd) os.system(cmd) #raise Exception('DEBUG') print('Finished processing dataset: ' + full_name) os.system('touch ' + done_flag_path) # Mark this dataset as finished #raise Exception('DEBUG') #if not os.path.exists(output_path): # raise Exception('Failed to download file ' + output_path) print('Finished downloading HDDS! files.') # Can just let this time out #api.logout() return 0
from usgs import api import json results = api.search('LANDSAT_TM', 'EE', start_date='2011-06-01', where={3653: 0}, max_results=1000) print(json.dumps(results))
def geturls(): dataset = 'LANDSAT_8_C1' global DATA_DICT_PATH global DOWNLOAD_PATH global PATH_S global ROW_S global LAST_PATH global LAST_ROW with open(DATA_DICT_PATH, 'rb') as fp: data_dict = pickle.load(fp) API_KEY = custom_functions.get_API_key() LAST_PATH = list(data_dict.keys())[-1] LAST_ROW = data_dict[LAST_PATH][-1] for Path in data_dict: # RESUME DOWNLOADING CODE if (PATH_S != ''): if int(Path) < int(PATH_S): continue for Row in data_dict[Path]: # RESUME DOWNLOADING CODE if (ROW_S != ''): if int(Row) < int(ROW_S): continue where = {20514 : Path, 20516 : Row} # MAKING QUERY REQUEST TO USGS API try: response = (api.search(dataset=dataset, node='EE', where=where, api_key=API_KEY)) except USGSError as e: if 'AUTH_UNAUTHORIZED' in e: API_KEY = custom_functions.get_API_key() response = (api.search(dataset=dataset, node='EE', where=where, api_key=API_KEY)) else: custom_functions.record_error(e + f'Error in downloader.py\nAPI search error at Path {Path} Row {Row}') if (response['data']['numberReturned'] == 0): continue # FINDING OUT THE LATEST DATASET AVAILABLE FOR THE PATH-ROW COMBINATION date = datetime.strptime('1957-10-03', '%Y-%m-%d') for i in response['data']['results']: currdate = datetime.strptime(i['acquisitionDate'], '%Y-%m-%d') if(currdate > date): date = currdate displayId = i['displayId'] downloadUrl = i['downloadUrl'] catalog_id = response['catalog_id'] entityId = i['entityId'] yield (displayId, Path, Row, downloadUrl, catalog_id, entityId)