예제 #1
0
def bin_request(fetchfcn,
                hash_key,
                dx=2,
                dy=2,
                dt=timedelta(days=1),
                **kwargs):
    """ check fetch query hash history and generate fetch requests

        requests are batched into dx° * dy° * dt request bins,
        with the entire range of depths included in each bin.
        coordinates are rounded to nearest outer-boundary degree integer,
        a query hash is stored if a fetch request is successful

        args:
            dx:
                delta longitude bin size (int)
            dy: 
                delta latitude bin size (int)
            dt:
                delta time bin size (timedelta)

        return: nothing
    """
    # break request into gridded dx*dy*dt chunks for querying
    lower, upper = -1, +1
    xlimit = lambda x, bound: int(x - (x % (dx * -bound)))
    ylimit = lambda y, bound: int(y - (y % (dy * -bound)))
    kwargs['west'] = max(-180, xlimit(kwargs['west'], lower))
    kwargs['east'] = min(+180, xlimit(kwargs['east'], upper))
    kwargs['south'] = max(-90, ylimit(kwargs['south'], lower))
    kwargs['north'] = min(+90, ylimit(kwargs['north'], upper))

    # fetch data chunks
    t = datetime(kwargs['start'].year, kwargs['start'].month,
                 kwargs['start'].day)
    while t < kwargs['end']:
        for x in range(kwargs['west'], kwargs['east'], dx):
            for y in range(kwargs['south'], kwargs['north'], dy):

                qry = dict(
                    zip(('west', 'east', 'south', 'north', 'start', 'end'),
                        (x, x + dx, y, y + dy, t, t + dt)))

                if 'top' in kwargs.keys():  # get entire depth column
                    qry['top'] = 0
                    qry['bottom'] = 5000

                #if not serialized(qry, f'fetch_{src}_{var}'):
                if not serialized(qry, hash_key):
                    fetchfcn(**qry.copy())
                else:
                    logging.debug(
                        f'FETCH_HANDLER DEBUG MSG: '
                        f'already fetched {t.date().isoformat()} '
                        f'{fmt_coords(qry)} {hash_key}! continuing...')
        t += dt

    return
예제 #2
0
    def fetch_bathymetry(self, **kwargs):
        # trim query indexing entropy and check for fetched data
        for k in ('start', 'lock', 'end', 'top', 'bottom'):
            if k in kwargs.keys(): del kwargs[k]
        if serialized(kwargs, 'fetch_chs_bathy'): return False

        # if new data was fetched, index the query hash
        if (fetch_chs(south=kwargs['south'],
                      north=kwargs['north'],
                      west=kwargs['west'],
                      east=kwargs['east'],
                      band_id=1)):
            insert_hash(kwargs, 'fetch_chs_bathy')
        return True
예제 #3
0
def fetch_era5(var, kwargs):
    """ fetch global era5 data for specified variable and time range

        args:
            var: string
                the variable short name of desired wave parameter 
                according to ERA5 docs.  the complete list can be found 
                here (table 7 for wave params):
                https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation#ERA5datadocumentation-Temporalfrequency
            kwargs: dict
                keyword arguments passed from the Era5() class as a dictionary

        return:
            True if new data was fetched, else False 
    """
    # cleaner stack trace by raising outside of try/except
    err = False
    try:
        c = cdsapi.Client(url=cfg['cdsapi']['url'], key=cfg['cdsapi']['key'])
    except KeyError:
        try:
            c = cdsapi.Client()
        except Exception:
            err = True

    if err:
        raise KeyError('CDS API has not been configured for the ERA5 module. '
                       'obtain an API token from the following URL and run '
                       'kadlu.era5_cfg(url="URL_HERE", key="TOKEN_HERE"). '
                       'https://cds.climate.copernicus.eu/api-how-to')

    assert 6 == sum(
        map(lambda kw: kw in kwargs.keys(),
            ['south', 'north', 'west', 'east', 'start', 'end'
             ])), 'malformed query'
    t = datetime(kwargs['start'].year, kwargs['start'].month,
                 kwargs['start'].day, kwargs['start'].hour)
    assert kwargs['end'] - kwargs['start'] <= timedelta(days=1, hours=1), \
            'use fetch_handler for this instead'

    # check if data has been fetched already
    if serialized(kwargs, f'fetch_era5_{era5_varmap[var]}'): return False

    # fetch the data
    fname = f'ERA5_reanalysis_{var}_{t.strftime("%Y-%m-%d")}.grb2'
    fpath = f'{storage_cfg()}{fname}'
    if not isfile(fpath):
        with dev_null():
            c.retrieve(
                'reanalysis-era5-single-levels', {
                    'product_type':
                    'reanalysis',
                    'format':
                    'grib',
                    'variable':
                    var,
                    'year':
                    t.strftime("%Y"),
                    'month':
                    t.strftime("%m"),
                    'day':
                    t.strftime("%d"),
                    'time': [
                        datetime(t.year, t.month, t.day, h).strftime('%H:00')
                        for h in range(24)
                    ]
                }, fpath)

    # load the data file and insert it into the database
    assert isfile(fpath)
    grb = pygrib.open(fpath)
    agg = np.array([[], [], [], [], []])
    table = var[4:] if var[0:4] == '10m_' else var

    for msg, num in zip(grb, range(1, grb.messages)):
        if msg.validDate < kwargs['start'] or msg.validDate > kwargs['end']:
            continue

        # read grib data
        z, y, x = msg.data()
        if np.ma.is_masked(z):
            z2 = z[~z.mask].data
            y2 = y[~z.mask]
            x2 = x[~z.mask]
        else:  # wind data has no mask
            z2 = z.reshape(-1)
            y2 = y.reshape(-1)
            x2 = x.reshape(-1)

        # adjust latitude-zero to 180th meridian
        x3 = ((x2 + 180) % 360) - 180

        # index coordinates, select query range subset, aggregate results
        xix = np.logical_and(x3 >= kwargs['west'], x3 <= kwargs['east'])
        yix = np.logical_and(y2 >= kwargs['south'], y2 <= kwargs['north'])
        idx = np.logical_and(xix, yix)
        agg = np.hstack((agg, [
            z2[idx], y2[idx], x3[idx],
            dt_2_epoch([msg.validDate for i in z2[idx]]),
            ['era5' for i in z2[idx]]
        ]))

    # perform the insertion
    if 'lock' in kwargs.keys(): kwargs['lock'].acquire()
    n1 = db.execute(f"SELECT COUNT(*) FROM {table}").fetchall()[0][0]
    db.executemany(
        f"INSERT OR IGNORE INTO {table} "
        f"VALUES (?,?,?,CAST(? AS INT),?)", agg.T)
    n2 = db.execute(f"SELECT COUNT(*) FROM {table}").fetchall()[0][0]
    db.execute("COMMIT")
    conn.commit()
    insert_hash(kwargs, f'fetch_era5_{era5_varmap[var]}')
    if 'lock' in kwargs.keys(): kwargs['lock'].release()

    logging.info(
        f"ERA5 {msg.validDate.date().isoformat()} {var}: "
        f"processed and inserted {n2-n1} rows in region {fmt_coords(kwargs)}. "
        f"{len(agg[0])- (n2-n1)} duplicates ignored")

    return True
예제 #4
0
def fetch_wwiii(var, kwargs):
    """ download wwiii data and return associated filepaths

        args:
            var: string
                the variable name of desired parameter according to WWIII docs
                the complete list of variables can be found at the following 
                URL under 'model output'
                https://polar.ncep.noaa.gov/waves/implementations.php
            south, north: float
                ymin, ymax coordinate boundaries (latitude). range: -90, 90
            west, east: float
                xmin, xmax coordinate boundaries (longitude). range: -180, 180
            start: datetime
                the start of the desired time range
            end: datetime
                the end of the desired time range

        return:
            True if new data was fetched, else False
    """
    assert 6 == sum(
        map(lambda kw: kw in kwargs.keys(),
            ['south', 'north', 'west', 'east', 'start', 'end'
             ])), 'malformed query'
    t = datetime(kwargs['start'].year, kwargs['start'].month, 1)
    assert kwargs['end'] - kwargs['start'] <= timedelta(days=1), \
            'use fetch_handler for this'

    if serialized(kwargs, f'fetch_wwiii_{wwiii_varmap[var]}'): return False
    #print("WWIII NOTICE: resolution selection not implemented yet. defaulting to 0.5°")
    regions = ['glo_30m']

    assert regions == ['glo_30m'], 'invalid region string'
    reg = regions[0]
    fname = f"multi_1.{reg}.{var}.{t.strftime('%Y%m')}.grb2"
    fetchfile = f"{storage_cfg()}{fname}"

    # if file hasnt been downloaded, fetch it
    if not isfile(fetchfile):  # and kwargs['start'].day == 1:
        if 'lock' in kwargs.keys(): kwargs['lock'].acquire()
        logging.info(f'WWIII {kwargs["start"].date().isoformat()} {var}: '
                     f'downloading {fname} from NOAA WaveWatch III...')
        if reg == 'glo_30m' and not 'wind' in var and t.year >= 2018:
            fetchurl = f"{wwiii_src}{t.strftime('%Y/%m')}/gribs/{fname}"
        else:
            fetchurl = f"{wwiii_src}{t.strftime('%Y/%m')}/{reg}/{fname}"
        with requests.get(fetchurl, stream=True) as payload:
            assert payload.status_code == 200, 'couldn\'t retrieve file'
            with open(fetchfile, 'wb') as f:
                shutil.copyfileobj(payload.raw, f)
        if 'lock' in kwargs.keys(): kwargs['lock'].release()

    # function to insert the parsed data to local database
    def insert(table, agg, null, kwargs):
        if 'lock' in kwargs.keys(): kwargs['lock'].acquire()
        n1 = db.execute(f"SELECT COUNT(*) FROM {table}").fetchall()[0][0]
        db.executemany(
            f"INSERT OR IGNORE INTO {table} VALUES (?,?,?,CAST(? AS INT),?)",
            agg.T)
        n2 = db.execute(f"SELECT COUNT(*) FROM {table}").fetchall()[0][0]
        db.execute("COMMIT")
        conn.commit()
        insert_hash(kwargs, f'fetch_wwiii_{wwiii_varmap[var]}')
        if 'lock' in kwargs.keys(): kwargs['lock'].release()
        logging.info(
            f"WWIII {kwargs['start'].date().isoformat()} {table}: "
            f"processed and inserted {n2-n1} rows for region {fmt_coords(kwargs)}. "
            f"{null} null values removed, "
            f"{len(agg[0]) - (n2-n1)} duplicates ignored")

    # open the file, parse data, insert values
    grib = pygrib.open(fetchfile)
    assert grib.messages > 0, f'problem opening {fetchfile}'
    null = 0
    agg = np.array([[], [], [], [], []])
    grbvar = grib[1]['name']
    table = f'{var}{grbvar[0]}' if var == 'wind' else var
    for msg, num in zip(grib, range(1, grib.messages)):
        if msg['name'] != grbvar:
            insert(table, agg, null, kwargs)
            table = f'{var}{msg["name"][0]}' if var == 'wind' else var
            agg = np.array([[], [], [], [], []])
            grbvar = msg['name']
            null = 0
        if msg.validDate < kwargs['start']: continue
        if msg.validDate > kwargs['end']: continue
        z, y, x = msg.data()
        src = np.array(['wwiii' for each in z[~z.mask].data])
        grid = np.vstack(
            (z[~z.mask].data, y[~z.mask], ((x[~z.mask] + 180) % 360) - 180,
             dt_2_epoch([msg.validDate
                         for each in z[~z.mask].data]), src)).astype(object)
        agg = np.hstack((agg, grid))
        null += sum(sum(z.mask))
    insert(table, agg, null, kwargs)

    return True
예제 #5
0
파일: hycom.py 프로젝트: matt24smith/kadlu
def fetch_idx(self, var, kwargs):
    """ convert user query to grid index slices, handle edge cases """
    def _idx(self, var, year, kwargs):
        """ build indices for query and call fetch_hycom """
        haystack = np.array(
            [self.epoch[year], self.depth, self.ygrid, self.xgrid])
        needles1 = np.array([
            dt_2_epoch(kwargs['start']), kwargs['top'], kwargs['south'],
            kwargs['west']
        ])
        needles2 = np.array([
            dt_2_epoch(kwargs['end']), kwargs['bottom'], kwargs['north'],
            kwargs['east']
        ])
        slices = list(
            zip(map(index, needles1, haystack), map(index, needles2,
                                                    haystack)))

        n = reduce(np.multiply, map(lambda s: s[1] - s[0] + 1, slices))
        assert n > 0, f"{n} records available within query boundaries: {kwargs}"

        logging.info(
            f"HYCOM {kwargs['start'].date().isoformat()} "
            f"downloading {n} {var} values in region {fmt_coords(kwargs)}...")
        fetch_hycom(self=self,
                    slices=slices,
                    var=var,
                    year=year,
                    kwargs=kwargs)
        return

    assert kwargs['start'] <= kwargs['end']
    assert kwargs['start'] > datetime(1994, 1,
                                      1), 'data not available in this range'
    assert kwargs['end'] < datetime(2016, 1,
                                    1), 'data not available in this range'
    assert kwargs['south'] <= kwargs['north']
    assert kwargs['top'] <= kwargs['bottom']
    assert kwargs['start'] >= datetime(1994, 1, 1)
    assert kwargs['end'] < datetime(2016, 1, 1)
    assert kwargs['end'] - kwargs['start'] <= timedelta(days=1), \
            "use fetch handler for this"

    # query local database for existing checksums
    if serialized(kwargs, f'fetch_hycom_{hycom_varmap[var]}'): return False
    if not serialized(seed='fetch_hycom_grid'):
        fetch_grid()
        insert_hash(seed='fetch_hycom_grid')

    if not self.grids:
        self.ygrid, self.xgrid = load_grid()
        self.epoch = load_times()
        self.depth = load_depth()
        self.grids = [self.ygrid, self.xgrid, self.epoch, self.depth]

    # if query spans antimeridian, make two seperate fetch requests
    year = str(kwargs['start'].year)
    if kwargs['west'] > kwargs['east']:
        logging.debug('splitting request')
        kwargs1, kwargs2 = kwargs.copy(), kwargs.copy()
        kwargs1['east'] = self.xgrid[-1]
        kwargs2['west'] = self.xgrid[0]
        if not serialized(kwargs1, f'fetch_hycom_{hycom_varmap[var]}'):
            _idx(self, var, year, kwargs1)
        if not serialized(kwargs2, f'fetch_hycom_{hycom_varmap[var]}'):
            _idx(self, var, year, kwargs2)
    else:
        _idx(self, var, year, kwargs)

    return True