def create_snapshots(): """ Run requests against USGS API for use in tests. """ api_key = api.login(os.environ['USGS_USERNAME'], os.environ['USGS_PASSWORD']) # Dataset Fields response = api.dataset_fields("LANDSAT_8_C1", "EE", api_key=api_key) write_response(response, 'dataset-fields.json') # Datasets response = api.datasets(None, "EE") write_response(response, 'datasets.json') # Download response = api.download("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"], product='STANDARD') write_response(response, 'download.json') # Download Options response = api.download_options("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"]) write_response(response, 'download-options.json') # Metadata response = api.metadata("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"]) write_response(response, 'metadata.json') # Search response = api.search("LANDSAT_8_C1", "EE", start_date='20170401', end_date='20170402', max_results=10) write_response(response, 'search.json') api.logout(api_key)
def test_download_options(): expected_keys = ["id", "displayId", "entityId", "datasetId", "available", "filesize", "productName", "productCode", "bulkAvailable", "downloadSystem", "secondaryDownloads"] response = api.download_options("LANDSAT_8_C1", ["LC82260782020217LGN00"]) assert check_root_keys(response) for item in response["data"]: for key in expected_keys: assert key in item
def test_download_options(): expected_keys = ["available", "storageLocation", "url", "productName", "filesize", "downloadCode"] response = api.download_options("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"]) assert check_root_keys(response) for item in response["data"][0]["downloadOptions"]: for key in expected_keys: assert item.get(key) is not None
def test_download_options(): expected_keys = [ "available", "storageLocation", "url", "productName", "filesize", "downloadCode" ] response = api.download_options("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"]) assert check_root_keys(response) for item in response["data"][0]["downloadOptions"]: for key in expected_keys: assert item.get(key) is not None
def create_snapshots(): """ Run requests against USGS API for use in tests. """ api_key = api.login(os.environ['USGS_USERNAME'], os.environ['USGS_PASSWORD']) # Dataset Fields response = api.dataset_fields("LANDSAT_8_C1", "EE", api_key=api_key) write_response(response, 'dataset-fields.json') # Datasets response = api.datasets(None, "EE") write_response(response, 'datasets.json') # Download response = api.download("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"], product='STANDARD') write_response(response, 'download.json') # Download Options response = api.download_options("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"]) write_response(response, 'download-options.json') # Metadata response = api.metadata("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"]) write_response(response, 'metadata.json') # Search response = api.search("LANDSAT_8_C1", "EE", start_date='20170401', end_date='20170402', max_results=10) write_response(response, 'search.json') api.logout(api_key)
def download(self, product, auth=None, progress_callback=None, **kwargs): """Download data from USGS catalogues""" fs_path, record_filename = self._prepare_download( product, outputs_extension=".tar.gz", **kwargs) if not fs_path or not record_filename: return fs_path # progress bar init if progress_callback is None: progress_callback = get_progress_callback() progress_callback.desc = product.properties.get("id", "") progress_callback.position = 1 try: api.login( self.config.credentials["username"], self.config.credentials["password"], save=True, ) except USGSError: raise AuthenticationError( "Please check your USGS credentials.") from None download_options = api.download_options( product.properties["productType"], product.properties["id"]) try: product_ids = [ p["id"] for p in download_options["data"] if p["downloadSystem"] == "dds" ] except KeyError as e: raise NotAvailableError("%s not found in %s's products" % (e, product.properties["id"])) if not product_ids: raise NotAvailableError("No USGS products found for %s" % product.properties["id"]) req_urls = [] for product_id in product_ids: download_request = api.download_request( product.properties["productType"], product.properties["id"], product_id) try: req_urls.extend([ x["url"] for x in download_request["data"]["preparingDownloads"] ]) except KeyError as e: raise NotAvailableError("%s not found in %s download_request" % (e, product.properties["id"])) if len(req_urls) > 1: logger.warning( "%s usgs products found for %s. Only first will be downloaded" % (len(req_urls), product.properties["id"])) elif not req_urls: raise NotAvailableError("No usgs request url was found for %s" % product.properties["id"]) req_url = req_urls[0] progress_callback.reset() with requests.get( req_url, stream=True, ) as stream: try: stream.raise_for_status() except HTTPError: import traceback as tb logger.error( "Error while getting resource :\n%s", tb.format_exc(), ) else: stream_size = int(stream.headers.get("content-length", 0)) progress_callback.max_size = stream_size progress_callback.reset() with open(fs_path, "wb") as fhandle: for chunk in stream.iter_content(chunk_size=64 * 1024): if chunk: fhandle.write(chunk) progress_callback(len(chunk), stream_size) with open(record_filename, "w") as fh: fh.write(product.properties["downloadLink"]) logger.debug("Download recorded in %s", record_filename) api.logout() # Check that the downloaded file is really a tar file if not tarfile.is_tarfile(fs_path): logger.warning( "Downloaded product is not a tar File. Please check its file type before using it" ) new_fs_path = fs_path[:fs_path.index(".tar.gz")] shutil.move(fs_path, new_fs_path) return new_fs_path return self._finalize(fs_path, outputs_extension=".tar.gz", **kwargs)
def main(argsIn): #pylint: disable=R0914,R0912 try: usage = "usage: fetch_hdds_images.py [options]" parser = argparse.ArgumentParser(usage=usage) parser.add_argument("--output-folder", dest="output_folder", required=True, help="Download files to this folder.") parser.add_argument("--user", dest="user", required=True, help="User name for EarthExplorer website.") parser.add_argument("--password", dest="password", required=True, help="Password name for EarthExplorer website.") parser.add_argument( "--force-login", action="store_true", dest="force_login", default=False, help="Don't reuse the cached EE API key if present.") parser.add_argument("--refetch-datasets", action="store_true", dest="refetch_datasets", default=False, help="Force a refetch of the dataset list.") parser.add_argument( "--refetch-scenes", action="store_true", dest="refetch_scenes", default=False, help="Force refetches of scene lists for each dataset.") parser.add_argument( "--image-list-path", dest="image_list_path", default=None, help= "Path to text file containing list of image IDs to download, one per line." ) parser.add_argument("--event-name", dest="event_name", default=None, help="Only download images from this event.") options = parser.parse_args(argsIn) except argparse.ArgumentError: print(usage) return -1 if options.output_folder and not os.path.exists(options.output_folder): os.mkdir(options.output_folder) images_to_use = [] if options.image_list_path: with open(options.image_list_path, 'r') as f: for line in f: images_to_use.append(line.strip()) # Only log in if our session expired (ugly function use to check!) if options.force_login or (not api._get_api_key(None)): #pylint: disable=W0212 print('Logging in to USGS EarthExplorer...') api.login(options.user, options.password, save=True, catalogId=CATALOG) #pylint: disable=W0612 print(api._get_api_key(None)) #pylint: disable=W0212 raise Exception('DEBUG') # Retrieve all of the available datasets dataset_list = get_dataset_list(options) print('Found ' + str(len(dataset_list)) + ' useable datasets.') #raise Exception('debug') # Don't think we need to do this! #get_dataset_fields(dataset_list) # TODO: Work through some errors. counter = 0 for (dataset, full_name) in dataset_list: counter = counter + 1 #if counter == 1: # continue if options.event_name: # Only download images from the specified event if options.event_name.lower() not in full_name.lower(): continue dataset_folder = os.path.join(options.output_folder, full_name) scene_list_path = os.path.join(dataset_folder, 'scene_list.dat') done_flag_path = os.path.join(dataset_folder, 'done.flag') if not os.path.exists(dataset_folder): os.mkdir(dataset_folder) if os.path.exists(done_flag_path) and not options.refetch_scenes: print('Skipping completed dataset ' + full_name) continue print('--> Search scenes for: ' + full_name) BATCH_SIZE = 10000 if not os.path.exists(scene_list_path) or options.refetch_scenes: # Request the scene list from USGS #details = {'Agency - Platform - Vendor':'WORLDVIEW', 'Sensor Type':'MS'} #details = {'sensor_type':'MS'} details = {} # TODO: How do these work?? # Large sets of results require multiple queries in order to get all of the data done = False error = False all_scenes = [] # Acculumate all scene data here while not done: print('Searching with start offset = ' + str(len(all_scenes))) results = api.search(dataset, CATALOG, where=details, max_results=BATCH_SIZE, starting_number=len(all_scenes), extended=False) if 'results' not in results['data']: print('ERROR: Failed to get any results for dataset: ' + full_name) error = True break if len(results['data']['results']) < BATCH_SIZE: done = True all_scenes += results['data']['results'] if error: continue results['data']['results'] = all_scenes # Cache the results to disk with open(scene_list_path, 'wb') as f: pickle.dump(results, f) else: # Load the results from the cache file with open(scene_list_path, 'rb') as f: results = pickle.load(f) print('Got ' + str(len(results['data']['results'])) + ' scene results.') for scene in results['data']['results']: fail = False REQUIRED_PARTS = ['displayId', 'summary', 'entityId', 'displayId'] for p in REQUIRED_PARTS: if (p not in scene) or (not scene[p]): print('scene object is missing element: ' + p) print(scene) fail = True if fail: continue # If image list was provided skip other image names if images_to_use and (scene['displayId'] not in images_to_use): continue # Figure out the downloaded file path for this image file_name = scene['displayId'] + '.zip' output_path = os.path.join(dataset_folder, file_name) if not os.path.exists(dataset_folder): os.mkdir(dataset_folder) if os.path.exists(output_path): continue # Already have the file! # Check if this is one of the sensors we are interested in. DESIRED_SENSORS = [('worldview', 'hp'), ('worldview', 'msi')] # TODO: Add more parts = scene['summary'].lower().split(',') platform = None sensor = None for part in parts: if 'platform:' in part: platform = part.split(':')[1].strip() if 'sensor:' in part: sensor = part.split(':')[1].strip() if (not platform) or (not sensor): raise Exception('Unknown sensor: ' + scene['summary']) if (platform, sensor) not in DESIRED_SENSORS: print((platform, sensor)) print('Undesired sensor: ' + scene['summary']) continue # Investigate the number of bands PLATFORM_BAND_COUNTS = {'worldview': 8, 'TODO': 1} min_num_bands = PLATFORM_BAND_COUNTS[platform] num_bands = None try: meta = api.metadata(dataset, CATALOG, scene['entityId']) except json.decoder.JSONDecodeError: print('Error fetching metadata for dataset = ' + dataset + ', entity = ' + scene['entityId']) continue try: for m in meta['data'][0]['metadataFields']: if m['fieldName'] == 'Number of bands': num_bands = int(m['value']) break if not num_bands: raise KeyError() # Treat like the except case if num_bands < min_num_bands: print('Skipping %s, too few bands: %d' % (scene['displayId'], num_bands)) continue except KeyError: print('Unable to perform metadata check!') print(meta) # Make sure we know which file option to download try: types = api.download_options(dataset, CATALOG, scene['entityId']) except json.decoder.JSONDecodeError: print('Error decoding download options!') continue if not types['data'] or not types['data'][0]: raise Exception('Need to handle types: ' + str(types)) ready = False download_type = 'STANDARD' # TODO: Does this ever change? for o in types['data'][0]['downloadOptions']: if o['available'] and o['downloadCode'] == download_type: ready = True break if not ready: raise Exception('Missing download option for scene: ' + str(types)) # Get the download URL of the file we want. r = api.download(dataset, CATALOG, [scene['entityId']], product=download_type) try: url = r['data'][0]['url'] except Exception as e: raise Exception('Failed to get download URL from result: ' + str(r)) from e print(scene['summary']) # Finally download the data! cmd = ('wget "%s" --user %s --password %s -O %s' % (url, options.user, options.password, output_path)) print(cmd) os.system(cmd) #raise Exception('DEBUG') print('Finished processing dataset: ' + full_name) os.system('touch ' + done_flag_path) # Mark this dataset as finished #raise Exception('DEBUG') #if not os.path.exists(output_path): # raise Exception('Failed to download file ' + output_path) print('Finished downloading HDDS! files.') # Can just let this time out #api.logout() return 0
def download_options(dataset, scene_ids, node, api_key): node = get_node(dataset, node) data = api.download_options(dataset, node, scene_ids) print(json.dumps(data))
def download_options(dataset, scene_ids, api_key): data = api.download_options(dataset, scene_ids) click.echo(json.dumps(data))
def download_options(dataset, scene_ids, node, api_key): node = get_node(dataset, node) data = api.download_options(dataset, node, scene_ids) print(json.dumps(data))