Ejemplo n.º 1
0
def download(url, cache=False):
    """
    Downloads URL using the Python requests module to the current directory.
    :param cache: if True then files will be downloaded to a cache directory.
    """
    try:
        if cache:
            # parsed_url = urlparse.urlparse(url)
            parsed_url = urlparse(url)
            filename = os.path.join(config.cache_path(), parsed_url.netloc,
                                    parsed_url.path.strip('/'))
            if os.path.exists(filename):
                LOGGER.info('file already in cache: %s',
                            os.path.basename(filename))
                if check_creationtime(filename, url):
                    LOGGER.info(
                        'file in cache older than archive file, downloading: %s ',
                        os.path.basename(filename))
                    os.remove(filename)
                    filename = download_file(url, out=filename)
            else:
                if not os.path.exists(os.path.dirname(filename)):
                    os.makedirs(os.path.dirname(filename))
                LOGGER.info('downloading: %s', url)
                filename = download_file(url, out=filename)
                # make softlink to current dir
                # os.symlink(filename, os.path.basename(filename))
# filename = os.path.basename(filename)
        else:
            filename = download_file(url)
    except Exception:
        LOGGER.exception('failed to download data')
    return filename
Ejemplo n.º 2
0
def reanalyses(start=1948, end=None, variable='slp', dataset='NCEP', timres='day', getlevel=True):
    """
    Fetches the reanalysis data (NCEP, 20CR or ERA_20C) to local file system
    :param start: int for start year to fetch source data
    :param end: int for end year to fetch source data (if None, current year will be the end)
    :param variable: variable name (default='slp'), geopotential height is given as e.g. z700
    :param dataset: default='NCEP'
    :return list: list of path/files.nc
    """
    # used for NETCDF convertion
    from netCDF4 import Dataset
    from os import path, system, remove
    from blackswan.ocgis_module import call
    from shutil import move
    # used for NETCDF convertion

    try:

        if end is None:
            end = dt.now().year
        obs_data = []

        if start is None:
            if dataset == 'NCEP':
                start = 1948
            if dataset == '20CR':
                start = 1851
        LOGGER.info('start / end date set')
    except:
        msg = "get_OBS module failed to get start end dates"
        LOGGER.exception(msg)
        raise Exception(msg)

    if 'z' in variable:
        level = variable.strip('z')
    else:
        level = None

    LOGGER.info('level: %s' % level)
    cur_year = dt.now().year
    cur_month = dt.now().month
    cur_day = dt.now().day
    try:
        for year in range(start, end + 1):
            LOGGER.debug('fetching single file for %s year %s ' % (dataset, year))
            try:
                if dataset == 'NCEP':
                    if variable == 'slp':
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/%s.%s.nc' % (variable, year)  # noqa
                    if variable == 'pr_wtr':
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/pr_wtr.eatm.%s.nc' % (year)  # noqa
                    if 'z' in variable:
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/hgt.%s.nc' % (year)  # noqa
                elif dataset == '20CRV2':
                    if variable == 'prmsl':
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/monolevel/prmsl.%s.nc' % year  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/Dailies/monolevel/prmsl.%s.nc' % year  # noqa
                    if 'z' in variable:
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/pressure/hgt.%s.nc' % (year)  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/Dailies/pressure/hgt.%s.nc' % (year)  # noqa
                elif dataset == '20CRV2c':
                    if variable == 'prmsl':
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/monolevel/prmsl.%s.nc' % year  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/Dailies/monolevel/prmsl.%s.nc' % year  # noqa
                    if 'z' in variable:
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/pressure/hgt.%s.nc' % (year)  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/Dailies/pressure/hgt.%s.nc' % (year)  # noqa
                else:
                    LOGGER.debug('Dataset %s not known' % dataset)
                LOGGER.debug('url: %s' % url)
            except:
                msg = "could not set url"
                LOGGER.exception(msg)
            try:
                # force updating of the current year dataset
                if year == cur_year:
                    # import urlparse
                    from blackswan import config
                    # parsed_url = urlparse.urlparse(url)
                    parsed_url = urlparse(url)
                    cur_filename = path.join(config.cache_path(), parsed_url.netloc, parsed_url.path.strip('/'))
                    if path.exists(cur_filename):
                        fn_time = dt.fromtimestamp(path.getmtime(cur_filename))
                        LOGGER.debug('Rean data for %s year creation time: %s' % (year, fn_time))
                        if (fn_time.year == cur_year) and (fn_time.month == cur_month) and (fn_time.day == cur_day):
                            LOGGER.debug('Rean data for %s year is up-to-date' % year)
                        else:
                            LOGGER.debug('Rean data for %s year forced to update' % year)
                            remove(cur_filename)
                # ###########################################
                df = download(url, cache=True)
                LOGGER.debug('single file fetched %s ' % year)
                # convert to NETCDF4_CLASSIC
                try:
                    ds = Dataset(df)
                    df_time = ds.variables['time']
                    # Here, need to check not just calendar, but that file is ncdf_classic already...
                    if (hasattr(df_time, 'calendar')) is False:
                        p, f = path.split(path.abspath(df))
                        LOGGER.debug("path = %s , file %s " % (p, f))
                        # May be an issue if several users are working at the same time
                        move(df, f)
                        conv = call(resource=f,
                                    output_format_options={'data_model': 'NETCDF4_CLASSIC'},
                                    dir_output=p,
                                    prefix=f.replace('.nc', ''))
                        obs_data.append(conv)
                        LOGGER.debug('file %s to NETCDF4_CLASSIC converted' % conv)
                        # Cleaning, could be 50gb... for each (!) user
                        # TODO Check how links work
                        cmdrm = 'rm -f %s' % (f)
                        system(cmdrm)
                    else:
                        obs_data.append(df)
                    ds.close()
                except:
                    LOGGER.exception('failed to convert into NETCDF4_CLASSIC')
            except:
                msg = "download failed on {0}.".format(url)
                LOGGER.exception(msg)
        LOGGER.info('Reanalyses data fetched for %s files' % len(obs_data))
    except:
        msg = "get reanalyses module failed to fetch data"
        LOGGER.exception(msg)
        raise Exception(msg)

    if (level is None) or (getlevel==False):
        data = obs_data
    else:
        LOGGER.info('get level: %s' % level)
        data = get_level(obs_data, level=level)
    return data
Ejemplo n.º 3
0
def fetch_eodata(item_type,
                 asset,
                 token,
                 bbox,
                 period=[dt.today() - timedelta(days=30),
                         dt.today()],
                 cloud_cover=0.5,
                 cache=True):
    """
    search for given EO data product provided by planet.
    The search and appropriate download is limited by bbox and search period

    :param item_type: product provided by planet
    :param asset: product asset, (visible, analytic, bands)
    :param token: Authentification token generated by planet Earth Obersavation Explorer
    :param bbox: latitude longitude coordinates defining a bounding box
    :param period: [start , end] datetime objects (default last 30 days)
    :param cloud_cover: threshold for cloud_cover tolerance. 0 = 0percent cloud_cover 1=100percent cloud_cover
    :param cache: if True file (default) is stored in local cache

    return list: list of pathes for fetched products
    """

    import os
    import json
    import requests
    from requests.auth import HTTPBasicAuth
    from tempfile import mkstemp
    import shutil
    import time
    from os.path import join
    from os import path, makedirs
    from blackswan.config import cache_path
    #  Enter a bbox: min_lon, max_lon, min_lat, max_lat.
    #                xmin ymin xmax ymax
    geojson_geometry = {
        "type":
        "Polygon",
        "coordinates": [[
            [bbox[0], bbox[1]],  # [14.600830078125, 8.677421123289992],
            [bbox[2], bbox[1]],  # [14.797210693359375, 8.677421123289992],
            [bbox[2], bbox[3]],  # [14.797210693359375, 8.90678000752024],
            [bbox[0], bbox[3]],  # [14.600830078125, 8.90678000752024],
            [bbox[0], bbox[1]],  # [14.600830078125, 8.677421123289992]
        ]]
    }

    LOGGER.debug("geojson_geometry: %s" % geojson_geometry)
    # get images that overlap with our AOI
    geometry_filter = {
        "type": "GeometryFilter",
        "field_name": "geometry",
        "config": geojson_geometry
    }

    start = period[0]
    end = period[1]

    LOGGER.debug("Period %s to %s " % (start, end))

    # get images acquired within a date range
    date_range_filter = {
        "type": "DateRangeFilter",
        "field_name": "acquired",
        "config": {
            "gte": "%s000Z" % (start.strftime('%Y-%m-%dT%H:%M:%S.')),
            "lte": "%s000Z" % (end.strftime('%Y-%m-%dT%H:%M:%S.')),
        }
    }

    # only get images which have <50% cloud coverage
    cloud_cover_filter = {
        "type": "RangeFilter",
        "field_name": "cloud_cover",
        "config": {
            "lte": cloud_cover
        }
    }

    # combine our geo, date, cloud filters
    combined_filter = {
        "type": "AndFilter",
        "config": [geometry_filter, date_range_filter, cloud_cover_filter]
    }

    # API Key
    PLANET_API_KEY = token  # os.getenv('PL_API_KEY')

    # item_type = item_type, assetproducts[0]  # "PSScene4Band"
    # API request object

    search_request = {
        "interval": "day",
        "item_types": [item_type],
        "filter": combined_filter
    }

    if cache:
        DIR_archiv = cache_path()
    else:
        DIR_archiv = '.'
    DIR = join(DIR_archiv, "EO_data", item_type, asset)

    if not os.path.exists(DIR):
        makedirs(DIR)

    # fire off the POST request
    search_result = requests.post(
        'https://api.planet.com/data/v1/quick-search',
        auth=HTTPBasicAuth(PLANET_API_KEY, ''),
        json=search_request)

    # LOGGER.info('Search result: %s ' % json.dumps(search_result.json(), indent=1))

    # extract image IDs only
    image_ids = [feature['id'] for feature in search_result.json()['features']]
    LOGGER.info("image IDs:  %s " % image_ids)

    resources = []
    resources_sleeping = []

    for image_id in image_ids:

        id0 = image_id
        if "xml" in asset:
            filename = "%s.xml" % id0
        else:
            filename = "%s.tif" % id0

        local_file = join(
            DIR, filename
        )  # mkstemp(dir="/home/nils/data/planet/", prefix=id0, suffix='.tif')

        if os.path.exists(local_file):
            LOGGER.info('File %s in cache' % filename)
            resources.extend([local_file])
        else:
            id0_url = 'https://api.planet.com/data/v1/item-types/{}/items/{}/assets'.format(
                item_type, id0)

            # Returns JSON metadata for assets in this ID. Learn more: planet.com/docs/reference/data-api/items-assets/#asset
            result = requests.get(id0_url,
                                  auth=HTTPBasicAuth(PLANET_API_KEY, ''))
            # List of asset types available for this particular satellite image
            keys = result.json().keys()
            LOGGER.debug("assets in file %s : %s " % (filename, keys))
            # This is "inactive" if the "visual" asset has not yet been activated; otherwise 'active'
            #  if 'analytic' in result.json().keys():
            if asset in keys:
                LOGGER.debug("downloading file %s" % filename)
                # LOGGER.debug(result.json()[asset]['status'])
                # Parse out useful links
                links = result.json()[asset]["_links"]  # u"analytic"
                self_link = links["_self"]
                activation_link = links["activate"]
                # Request activation of the 'visual' asset:
                activate_result = requests.get(activation_link,
                                               auth=HTTPBasicAuth(
                                                   PLANET_API_KEY, ''))
                # Parse out useful links
                links = result.json()[asset]["_links"]  # u"analytic"
                self_link = links["_self"]
                activation_link = links["activate"]
                # Request activation of the 'visual' asset:
                activate_result = requests.get(activation_link,
                                               auth=HTTPBasicAuth(
                                                   PLANET_API_KEY, ''))
                activation_status_result = requests.get(self_link,
                                                        auth=HTTPBasicAuth(
                                                            PLANET_API_KEY,
                                                            ''))

                try:
                    timeout = time.time() + 30  # 30 seconds from now
                    while activation_status_result.json(
                    )["status"] != 'active':
                        if time.time(
                        ) > timeout and activation_status_result.json(
                        )["status"] == 'inactive':
                            LOGGER.debug(
                                "File %s is still inactive after 30sec. Giving up"
                                % filename)
                            resources_sleeping.extend([filename])
                            break
                        else:
                            LOGGER.debug(
                                'File %s is sleeping. gently waking up' %
                                filename)
                            LOGGER.debug(
                                activation_status_result.json()["status"])
                            time.sleep(30)
                            activation_status_result = requests.get(
                                self_link,
                                auth=HTTPBasicAuth(PLANET_API_KEY, ''))

                    if time.time() < timeout or activation_status_result.json(
                    )["status"] == 'active':
                        LOGGER.debug(
                            'File ready to download: %s' %
                            (activation_status_result.json()["status"]))
                        # Image can be downloaded by making a GET with your Planet API key, from here:
                        download_link = activation_status_result.json(
                        )["location"]
                        r = requests.get(download_link,
                                         stream=True,
                                         verify=False)
                        with open(local_file, 'wb') as fp:
                            shutil.copyfileobj(r.raw, fp)
                            resources.extend([local_file])
                except:
                    LOGGER.exception("failed to download file %s " % filename)
            else:
                LOGGER.debug(
                    'Asset not found in keys, most likely no permissions for this data set %s '
                    % filename)

    return resources_sleeping, resources