Beispiel #1
0
def create_snapshots():
    """
    Run requests against USGS API for use in tests.
    """

    api_key = api.login(os.environ['USGS_USERNAME'], os.environ['USGS_PASSWORD'])

    # Dataset Fields
    response = api.dataset_fields("LANDSAT_8_C1", "EE", api_key=api_key)
    write_response(response, 'dataset-fields.json')

    # Datasets
    response = api.datasets(None, "EE")
    write_response(response, 'datasets.json')

    # Download
    response = api.download("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"], product='STANDARD')
    write_response(response, 'download.json')

    # Download Options
    response = api.download_options("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"])
    write_response(response, 'download-options.json')

    # Metadata
    response = api.metadata("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"])
    write_response(response, 'metadata.json')

    # Search
    response = api.search("LANDSAT_8_C1", "EE", start_date='20170401', end_date='20170402', max_results=10)
    write_response(response, 'search.json')

    api.logout(api_key)
Beispiel #2
0
def test_download_options():

    expected_keys = ["id", "displayId", "entityId", "datasetId", "available", "filesize", "productName", "productCode", "bulkAvailable", "downloadSystem", "secondaryDownloads"]
    response = api.download_options("LANDSAT_8_C1", ["LC82260782020217LGN00"])

    assert check_root_keys(response)

    for item in response["data"]:
        for key in expected_keys:
            assert key in item
Beispiel #3
0
def test_download_options():

    expected_keys = ["available", "storageLocation", "url", "productName", "filesize", "downloadCode"]
    response = api.download_options("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"])

    assert check_root_keys(response)

    for item in response["data"][0]["downloadOptions"]:
        for key in expected_keys:
            assert item.get(key) is not None
Beispiel #4
0
def test_download_options():

    expected_keys = [
        "available", "storageLocation", "url", "productName", "filesize",
        "downloadCode"
    ]
    response = api.download_options("LANDSAT_8_C1", "EE",
                                    ["LC80810712017104LGN00"])

    assert check_root_keys(response)

    for item in response["data"][0]["downloadOptions"]:
        for key in expected_keys:
            assert item.get(key) is not None
Beispiel #5
0
def create_snapshots():
    """
    Run requests against USGS API for use in tests.
    """

    api_key = api.login(os.environ['USGS_USERNAME'],
                        os.environ['USGS_PASSWORD'])

    # Dataset Fields
    response = api.dataset_fields("LANDSAT_8_C1", "EE", api_key=api_key)
    write_response(response, 'dataset-fields.json')

    # Datasets
    response = api.datasets(None, "EE")
    write_response(response, 'datasets.json')

    # Download
    response = api.download("LANDSAT_8_C1",
                            "EE", ["LC80810712017104LGN00"],
                            product='STANDARD')
    write_response(response, 'download.json')

    # Download Options
    response = api.download_options("LANDSAT_8_C1", "EE",
                                    ["LC80810712017104LGN00"])
    write_response(response, 'download-options.json')

    # Metadata
    response = api.metadata("LANDSAT_8_C1", "EE", ["LC80810712017104LGN00"])
    write_response(response, 'metadata.json')

    # Search
    response = api.search("LANDSAT_8_C1",
                          "EE",
                          start_date='20170401',
                          end_date='20170402',
                          max_results=10)
    write_response(response, 'search.json')

    api.logout(api_key)
Beispiel #6
0
    def download(self, product, auth=None, progress_callback=None, **kwargs):
        """Download data from USGS catalogues"""

        fs_path, record_filename = self._prepare_download(
            product, outputs_extension=".tar.gz", **kwargs)
        if not fs_path or not record_filename:
            return fs_path

        # progress bar init
        if progress_callback is None:
            progress_callback = get_progress_callback()
        progress_callback.desc = product.properties.get("id", "")
        progress_callback.position = 1

        try:
            api.login(
                self.config.credentials["username"],
                self.config.credentials["password"],
                save=True,
            )
        except USGSError:
            raise AuthenticationError(
                "Please check your USGS credentials.") from None

        download_options = api.download_options(
            product.properties["productType"], product.properties["id"])

        try:
            product_ids = [
                p["id"] for p in download_options["data"]
                if p["downloadSystem"] == "dds"
            ]
        except KeyError as e:
            raise NotAvailableError("%s not found in %s's products" %
                                    (e, product.properties["id"]))

        if not product_ids:
            raise NotAvailableError("No USGS products found for %s" %
                                    product.properties["id"])

        req_urls = []
        for product_id in product_ids:
            download_request = api.download_request(
                product.properties["productType"], product.properties["id"],
                product_id)
            try:
                req_urls.extend([
                    x["url"]
                    for x in download_request["data"]["preparingDownloads"]
                ])
            except KeyError as e:
                raise NotAvailableError("%s not found in %s download_request" %
                                        (e, product.properties["id"]))

        if len(req_urls) > 1:
            logger.warning(
                "%s usgs products found for %s. Only first will be downloaded"
                % (len(req_urls), product.properties["id"]))
        elif not req_urls:
            raise NotAvailableError("No usgs request url was found for %s" %
                                    product.properties["id"])

        req_url = req_urls[0]
        progress_callback.reset()
        with requests.get(
                req_url,
                stream=True,
        ) as stream:
            try:
                stream.raise_for_status()
            except HTTPError:
                import traceback as tb

                logger.error(
                    "Error while getting resource :\n%s",
                    tb.format_exc(),
                )
            else:
                stream_size = int(stream.headers.get("content-length", 0))
                progress_callback.max_size = stream_size
                progress_callback.reset()
                with open(fs_path, "wb") as fhandle:
                    for chunk in stream.iter_content(chunk_size=64 * 1024):
                        if chunk:
                            fhandle.write(chunk)
                            progress_callback(len(chunk), stream_size)

        with open(record_filename, "w") as fh:
            fh.write(product.properties["downloadLink"])
        logger.debug("Download recorded in %s", record_filename)

        api.logout()

        # Check that the downloaded file is really a tar file
        if not tarfile.is_tarfile(fs_path):
            logger.warning(
                "Downloaded product is not a tar File. Please check its file type before using it"
            )
            new_fs_path = fs_path[:fs_path.index(".tar.gz")]
            shutil.move(fs_path, new_fs_path)
            return new_fs_path
        return self._finalize(fs_path, outputs_extension=".tar.gz", **kwargs)
Beispiel #7
0
def main(argsIn):  #pylint: disable=R0914,R0912

    try:

        usage = "usage: fetch_hdds_images.py [options]"
        parser = argparse.ArgumentParser(usage=usage)

        parser.add_argument("--output-folder",
                            dest="output_folder",
                            required=True,
                            help="Download files to this folder.")

        parser.add_argument("--user",
                            dest="user",
                            required=True,
                            help="User name for EarthExplorer website.")
        parser.add_argument("--password",
                            dest="password",
                            required=True,
                            help="Password name for EarthExplorer website.")

        parser.add_argument(
            "--force-login",
            action="store_true",
            dest="force_login",
            default=False,
            help="Don't reuse the cached EE API key if present.")

        parser.add_argument("--refetch-datasets",
                            action="store_true",
                            dest="refetch_datasets",
                            default=False,
                            help="Force a refetch of the dataset list.")

        parser.add_argument(
            "--refetch-scenes",
            action="store_true",
            dest="refetch_scenes",
            default=False,
            help="Force refetches of scene lists for each dataset.")

        parser.add_argument(
            "--image-list-path",
            dest="image_list_path",
            default=None,
            help=
            "Path to text file containing list of image IDs to download, one per line."
        )

        parser.add_argument("--event-name",
                            dest="event_name",
                            default=None,
                            help="Only download images from this event.")

        options = parser.parse_args(argsIn)

    except argparse.ArgumentError:
        print(usage)
        return -1

    if options.output_folder and not os.path.exists(options.output_folder):
        os.mkdir(options.output_folder)

    images_to_use = []
    if options.image_list_path:
        with open(options.image_list_path, 'r') as f:
            for line in f:
                images_to_use.append(line.strip())

    # Only log in if our session expired (ugly function use to check!)
    if options.force_login or (not api._get_api_key(None)):  #pylint: disable=W0212
        print('Logging in to USGS EarthExplorer...')
        api.login(options.user, options.password, save=True, catalogId=CATALOG)  #pylint: disable=W0612

        print(api._get_api_key(None))  #pylint: disable=W0212
        raise Exception('DEBUG')

    # Retrieve all of the available datasets
    dataset_list = get_dataset_list(options)

    print('Found ' + str(len(dataset_list)) + ' useable datasets.')
    #raise Exception('debug')

    # Don't think we need to do this!
    #get_dataset_fields(dataset_list)

    # TODO: Work through some errors.
    counter = 0
    for (dataset, full_name) in dataset_list:
        counter = counter + 1
        #if counter == 1:
        #    continue

        if options.event_name:  # Only download images from the specified event
            if options.event_name.lower() not in full_name.lower():
                continue

        dataset_folder = os.path.join(options.output_folder, full_name)
        scene_list_path = os.path.join(dataset_folder, 'scene_list.dat')
        done_flag_path = os.path.join(dataset_folder, 'done.flag')
        if not os.path.exists(dataset_folder):
            os.mkdir(dataset_folder)

        if os.path.exists(done_flag_path) and not options.refetch_scenes:
            print('Skipping completed dataset ' + full_name)
            continue

        print('--> Search scenes for: ' + full_name)

        BATCH_SIZE = 10000
        if not os.path.exists(scene_list_path) or options.refetch_scenes:
            # Request the scene list from USGS
            #details = {'Agency - Platform - Vendor':'WORLDVIEW', 'Sensor Type':'MS'}
            #details = {'sensor_type':'MS'}
            details = {}  # TODO: How do these work??

            # Large sets of results require multiple queries in order to get all of the data
            done = False
            error = False
            all_scenes = []  # Acculumate all scene data here
            while not done:
                print('Searching with start offset = ' + str(len(all_scenes)))
                results = api.search(dataset,
                                     CATALOG,
                                     where=details,
                                     max_results=BATCH_SIZE,
                                     starting_number=len(all_scenes),
                                     extended=False)

                if 'results' not in results['data']:
                    print('ERROR: Failed to get any results for dataset: ' +
                          full_name)
                    error = True
                    break
                if len(results['data']['results']) < BATCH_SIZE:
                    done = True
                all_scenes += results['data']['results']

            if error:
                continue

            results['data']['results'] = all_scenes

            # Cache the results to disk
            with open(scene_list_path, 'wb') as f:
                pickle.dump(results, f)

        else:  # Load the results from the cache file
            with open(scene_list_path, 'rb') as f:
                results = pickle.load(f)

        print('Got ' + str(len(results['data']['results'])) +
              ' scene results.')

        for scene in results['data']['results']:

            fail = False
            REQUIRED_PARTS = ['displayId', 'summary', 'entityId', 'displayId']
            for p in REQUIRED_PARTS:
                if (p not in scene) or (not scene[p]):
                    print('scene object is missing element: ' + p)
                    print(scene)
                    fail = True
            if fail:
                continue

            # If image list was provided skip other image names
            if images_to_use and (scene['displayId'] not in images_to_use):
                continue

            # Figure out the downloaded file path for this image
            file_name = scene['displayId'] + '.zip'
            output_path = os.path.join(dataset_folder, file_name)
            if not os.path.exists(dataset_folder):
                os.mkdir(dataset_folder)
            if os.path.exists(output_path):
                continue  # Already have the file!

            # Check if this is one of the sensors we are interested in.
            DESIRED_SENSORS = [('worldview', 'hp'),
                               ('worldview', 'msi')]  # TODO: Add more
            parts = scene['summary'].lower().split(',')
            platform = None
            sensor = None
            for part in parts:
                if 'platform:' in part:
                    platform = part.split(':')[1].strip()
                if 'sensor:' in part:
                    sensor = part.split(':')[1].strip()
            if (not platform) or (not sensor):
                raise Exception('Unknown sensor: ' + scene['summary'])
            if (platform, sensor) not in DESIRED_SENSORS:
                print((platform, sensor))
                print('Undesired sensor: ' + scene['summary'])
                continue

            # Investigate the number of bands
            PLATFORM_BAND_COUNTS = {'worldview': 8, 'TODO': 1}
            min_num_bands = PLATFORM_BAND_COUNTS[platform]
            num_bands = None
            try:
                meta = api.metadata(dataset, CATALOG, scene['entityId'])
            except json.decoder.JSONDecodeError:
                print('Error fetching metadata for dataset = ' + dataset +
                      ', entity = ' + scene['entityId'])
                continue
            try:
                for m in meta['data'][0]['metadataFields']:
                    if m['fieldName'] == 'Number of bands':
                        num_bands = int(m['value'])
                        break
                if not num_bands:
                    raise KeyError()  # Treat like the except case
                if num_bands < min_num_bands:
                    print('Skipping %s, too few bands: %d' %
                          (scene['displayId'], num_bands))
                    continue
            except KeyError:
                print('Unable to perform metadata check!')
                print(meta)

            # Make sure we know which file option to download
            try:
                types = api.download_options(dataset, CATALOG,
                                             scene['entityId'])
            except json.decoder.JSONDecodeError:
                print('Error decoding download options!')
                continue

            if not types['data'] or not types['data'][0]:
                raise Exception('Need to handle types: ' + str(types))
            ready = False
            download_type = 'STANDARD'  # TODO: Does this ever change?
            for o in types['data'][0]['downloadOptions']:
                if o['available'] and o['downloadCode'] == download_type:
                    ready = True
                    break
            if not ready:
                raise Exception('Missing download option for scene: ' +
                                str(types))

            # Get the download URL of the file we want.
            r = api.download(dataset,
                             CATALOG, [scene['entityId']],
                             product=download_type)
            try:
                url = r['data'][0]['url']
            except Exception as e:
                raise Exception('Failed to get download URL from result: ' +
                                str(r)) from e

            print(scene['summary'])
            # Finally download the data!
            cmd = ('wget "%s" --user %s --password %s -O %s' %
                   (url, options.user, options.password, output_path))
            print(cmd)
            os.system(cmd)

            #raise Exception('DEBUG')

        print('Finished processing dataset: ' + full_name)
        os.system('touch ' + done_flag_path)  # Mark this dataset as finished
        #raise Exception('DEBUG')

        #if not os.path.exists(output_path):
        #    raise Exception('Failed to download file ' + output_path)

    print('Finished downloading HDDS! files.')
    # Can just let this time out
    #api.logout()

    return 0
Beispiel #8
0
def download_options(dataset, scene_ids, node, api_key):
    
    node = get_node(dataset, node)
    
    data = api.download_options(dataset, node, scene_ids)
    print(json.dumps(data))
Beispiel #9
0
def download_options(dataset, scene_ids, api_key):
    data = api.download_options(dataset, scene_ids)
    click.echo(json.dumps(data))
Beispiel #10
0
def download_options(dataset, scene_ids, node, api_key):

    node = get_node(dataset, node)

    data = api.download_options(dataset, node, scene_ids)
    print(json.dumps(data))