Python insert_hash 예제들, kadlu.geospatial.data_sources.data_util.insert_hash Python 예제들

예제 #1

0

파일 보기

    def fetch_bathymetry(self, **kwargs):
        # trim query indexing entropy and check for fetched data
        for k in ('start', 'lock', 'end', 'top', 'bottom'):
            if k in kwargs.keys(): del kwargs[k]
        if serialized(kwargs, 'fetch_chs_bathy'): return False

        # if new data was fetched, index the query hash
        if (fetch_chs(south=kwargs['south'],
                      north=kwargs['north'],
                      west=kwargs['west'],
                      east=kwargs['east'],
                      band_id=1)):
            insert_hash(kwargs, 'fetch_chs_bathy')
        return True

예제 #2

0

파일 보기

 def insert(table, agg, null, kwargs):
     if 'lock' in kwargs.keys(): kwargs['lock'].acquire()
     n1 = db.execute(f"SELECT COUNT(*) FROM {table}").fetchall()[0][0]
     db.executemany(
         f"INSERT OR IGNORE INTO {table} VALUES (?,?,?,CAST(? AS INT),?)",
         agg.T)
     n2 = db.execute(f"SELECT COUNT(*) FROM {table}").fetchall()[0][0]
     db.execute("COMMIT")
     conn.commit()
     insert_hash(kwargs, f'fetch_wwiii_{wwiii_varmap[var]}')
     if 'lock' in kwargs.keys(): kwargs['lock'].release()
     logging.info(
         f"WWIII {kwargs['start'].date().isoformat()} {table}: "
         f"processed and inserted {n2-n1} rows for region {fmt_coords(kwargs)}. "
         f"{null} null values removed, "
         f"{len(agg[0]) - (n2-n1)} duplicates ignored")

예제 #3

0

파일 보기

def fetch_era5(var, kwargs):
    """ fetch global era5 data for specified variable and time range

        args:
            var: string
                the variable short name of desired wave parameter 
                according to ERA5 docs.  the complete list can be found 
                here (table 7 for wave params):
                https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation#ERA5datadocumentation-Temporalfrequency
            kwargs: dict
                keyword arguments passed from the Era5() class as a dictionary

        return:
            True if new data was fetched, else False 
    """
    # cleaner stack trace by raising outside of try/except
    err = False
    try:
        c = cdsapi.Client(url=cfg['cdsapi']['url'], key=cfg['cdsapi']['key'])
    except KeyError:
        try:
            c = cdsapi.Client()
        except Exception:
            err = True

    if err:
        raise KeyError('CDS API has not been configured for the ERA5 module. '
                       'obtain an API token from the following URL and run '
                       'kadlu.era5_cfg(url="URL_HERE", key="TOKEN_HERE"). '
                       'https://cds.climate.copernicus.eu/api-how-to')

    assert 6 == sum(
        map(lambda kw: kw in kwargs.keys(),
            ['south', 'north', 'west', 'east', 'start', 'end'
             ])), 'malformed query'
    t = datetime(kwargs['start'].year, kwargs['start'].month,
                 kwargs['start'].day, kwargs['start'].hour)
    assert kwargs['end'] - kwargs['start'] <= timedelta(days=1, hours=1), \
            'use fetch_handler for this instead'

    # check if data has been fetched already
    if serialized(kwargs, f'fetch_era5_{era5_varmap[var]}'): return False

    # fetch the data
    fname = f'ERA5_reanalysis_{var}_{t.strftime("%Y-%m-%d")}.grb2'
    fpath = f'{storage_cfg()}{fname}'
    if not isfile(fpath):
        with dev_null():
            c.retrieve(
                'reanalysis-era5-single-levels', {
                    'product_type':
                    'reanalysis',
                    'format':
                    'grib',
                    'variable':
                    var,
                    'year':
                    t.strftime("%Y"),
                    'month':
                    t.strftime("%m"),
                    'day':
                    t.strftime("%d"),
                    'time': [
                        datetime(t.year, t.month, t.day, h).strftime('%H:00')
                        for h in range(24)
                    ]
                }, fpath)

    # load the data file and insert it into the database
    assert isfile(fpath)
    grb = pygrib.open(fpath)
    agg = np.array([[], [], [], [], []])
    table = var[4:] if var[0:4] == '10m_' else var

    for msg, num in zip(grb, range(1, grb.messages)):
        if msg.validDate < kwargs['start'] or msg.validDate > kwargs['end']:
            continue

        # read grib data
        z, y, x = msg.data()
        if np.ma.is_masked(z):
            z2 = z[~z.mask].data
            y2 = y[~z.mask]
            x2 = x[~z.mask]
        else:  # wind data has no mask
            z2 = z.reshape(-1)
            y2 = y.reshape(-1)
            x2 = x.reshape(-1)

        # adjust latitude-zero to 180th meridian
        x3 = ((x2 + 180) % 360) - 180

        # index coordinates, select query range subset, aggregate results
        xix = np.logical_and(x3 >= kwargs['west'], x3 <= kwargs['east'])
        yix = np.logical_and(y2 >= kwargs['south'], y2 <= kwargs['north'])
        idx = np.logical_and(xix, yix)
        agg = np.hstack((agg, [
            z2[idx], y2[idx], x3[idx],
            dt_2_epoch([msg.validDate for i in z2[idx]]),
            ['era5' for i in z2[idx]]
        ]))

    # perform the insertion
    if 'lock' in kwargs.keys(): kwargs['lock'].acquire()
    n1 = db.execute(f"SELECT COUNT(*) FROM {table}").fetchall()[0][0]
    db.executemany(
        f"INSERT OR IGNORE INTO {table} "
        f"VALUES (?,?,?,CAST(? AS INT),?)", agg.T)
    n2 = db.execute(f"SELECT COUNT(*) FROM {table}").fetchall()[0][0]
    db.execute("COMMIT")
    conn.commit()
    insert_hash(kwargs, f'fetch_era5_{era5_varmap[var]}')
    if 'lock' in kwargs.keys(): kwargs['lock'].release()

    logging.info(
        f"ERA5 {msg.validDate.date().isoformat()} {var}: "
        f"processed and inserted {n2-n1} rows in region {fmt_coords(kwargs)}. "
        f"{len(agg[0])- (n2-n1)} duplicates ignored")

    return True

예제 #4

0

파일 보기

파일: hycom.py 프로젝트: matt24smith/kadlu

def fetch_idx(self, var, kwargs):
    """ convert user query to grid index slices, handle edge cases """
    def _idx(self, var, year, kwargs):
        """ build indices for query and call fetch_hycom """
        haystack = np.array(
            [self.epoch[year], self.depth, self.ygrid, self.xgrid])
        needles1 = np.array([
            dt_2_epoch(kwargs['start']), kwargs['top'], kwargs['south'],
            kwargs['west']
        ])
        needles2 = np.array([
            dt_2_epoch(kwargs['end']), kwargs['bottom'], kwargs['north'],
            kwargs['east']
        ])
        slices = list(
            zip(map(index, needles1, haystack), map(index, needles2,
                                                    haystack)))

        n = reduce(np.multiply, map(lambda s: s[1] - s[0] + 1, slices))
        assert n > 0, f"{n} records available within query boundaries: {kwargs}"

        logging.info(
            f"HYCOM {kwargs['start'].date().isoformat()} "
            f"downloading {n} {var} values in region {fmt_coords(kwargs)}...")
        fetch_hycom(self=self,
                    slices=slices,
                    var=var,
                    year=year,
                    kwargs=kwargs)
        return

    assert kwargs['start'] <= kwargs['end']
    assert kwargs['start'] > datetime(1994, 1,
                                      1), 'data not available in this range'
    assert kwargs['end'] < datetime(2016, 1,
                                    1), 'data not available in this range'
    assert kwargs['south'] <= kwargs['north']
    assert kwargs['top'] <= kwargs['bottom']
    assert kwargs['start'] >= datetime(1994, 1, 1)
    assert kwargs['end'] < datetime(2016, 1, 1)
    assert kwargs['end'] - kwargs['start'] <= timedelta(days=1), \
            "use fetch handler for this"

    # query local database for existing checksums
    if serialized(kwargs, f'fetch_hycom_{hycom_varmap[var]}'): return False
    if not serialized(seed='fetch_hycom_grid'):
        fetch_grid()
        insert_hash(seed='fetch_hycom_grid')

    if not self.grids:
        self.ygrid, self.xgrid = load_grid()
        self.epoch = load_times()
        self.depth = load_depth()
        self.grids = [self.ygrid, self.xgrid, self.epoch, self.depth]

    # if query spans antimeridian, make two seperate fetch requests
    year = str(kwargs['start'].year)
    if kwargs['west'] > kwargs['east']:
        logging.debug('splitting request')
        kwargs1, kwargs2 = kwargs.copy(), kwargs.copy()
        kwargs1['east'] = self.xgrid[-1]
        kwargs2['west'] = self.xgrid[0]
        if not serialized(kwargs1, f'fetch_hycom_{hycom_varmap[var]}'):
            _idx(self, var, year, kwargs1)
        if not serialized(kwargs2, f'fetch_hycom_{hycom_varmap[var]}'):
            _idx(self, var, year, kwargs2)
    else:
        _idx(self, var, year, kwargs)

    return True

예제 #5

0

파일 보기

파일: hycom.py 프로젝트: matt24smith/kadlu

def fetch_hycom(self, var, year, slices, kwargs):
    """ download data from hycom, prepare it, and load into database

        args:
            year: string
                string value between 1994 and 2016
            slices: list of tuples
                correct ordering for tuples is [epoch, depth, lon, lat]
                each tuple contains the start and end grid index of the
                variable to be sliced. an example of the slices list:
                slices = [
                    (0, 2),         # time: start, end 
                    (0, 3),         # depth: top, bottom
                    (800, 840),     # x grid index: xmin, xmax (lon)
                    (900, 1000)     # y grid index: ymin, ymax (lat)
                ]
            var: string
                variable to be fetched. complete list of variables here
                https://tds.hycom.org/thredds/dodsC/GLBv0.08/expt_53.X/data/2015.html
            lat: array
                the first array returned by load_grid()
            lon: array
                the second array returned by load_grid()
            epoch: dictionary
                dictionary containing temporal grid arrays
                used to convert epoch index to datetime
                a year string key between 1994 and 2015 holds a numpy array
                of datetimes
            depth: array
                array returned by load_depth()

        return: nothing
    """

    # generate request
    t1 = datetime.now()
    url = f"{hycom_src}/{year}.ascii?{slices_str(var, slices)}"
    with requests.get(url, stream=True) as payload_netcdf:
        assert payload_netcdf.status_code == 200, "couldn't access hycom server"
        meta, data = payload_netcdf.text.split\
        ("---------------------------------------------\n")

    t2 = datetime.now()

    # parse response into numpy array
    arrs = data.split("\n\n")[:-1]
    shape_str, payload = arrs[0].split("\n", 1)
    shape = tuple(
        [int(x) for x in shape_str.split("[", 1)[1][:-1].split("][")])
    cube = np.ndarray(shape, dtype=np.float)

    for arr in payload.split("\n"):
        ix_str, row_csv = arr.split(", ", 1)
        a, b, c = [int(x) for x in ix_str[1:-1].split("][")]
        cube[a][b][c] = np.array(row_csv.split(", "), dtype=np.int)

    # build coordinate grid, populate with values, adjust scaling, remove nulls
    flatten = reduce(np.multiply, map(lambda s: s[1] - s[0] + 1, slices))
    add_offset = 20 if 'salinity' in var or 'water_temp' in var else 0
    null_value = -10 if 'salinity' in var or 'water_temp' in var else -30
    grid = np.array([(None, y, x, t, d, 'hycom')
                     for t in self.epoch[year][slices[0][0]:slices[0][1] + 1]
                     for d in self.depth[slices[1][0]:slices[1][1] + 1]
                     for y in self.ygrid[slices[2][0]:slices[2][1] + 1]
                     for x in self.xgrid[slices[3][0]:slices[3][1] + 1]])
    grid[:, 0] = np.reshape(cube, flatten) * 0.001 + add_offset
    grid = grid[grid[:, 0] != null_value]

    # batch database insertion ignoring duplicates
    if 'lock' in kwargs.keys(): kwargs['lock'].acquire()
    n1 = db.execute(f"SELECT COUNT(*) FROM hycom_{var}").fetchall()[0][0]
    db.executemany(
        f"INSERT OR IGNORE INTO hycom_{var} VALUES "
        "(?, ?, ?, CAST(? AS INT), CAST(? AS INT), ?)", grid)
    n2 = db.execute(f"SELECT COUNT(*) FROM hycom_{var}").fetchall()[0][0]
    db.execute("COMMIT")
    conn.commit()
    insert_hash(kwargs, f'fetch_hycom_{hycom_varmap[var]}')
    if 'lock' in kwargs.keys(): kwargs['lock'].release()

    t3 = datetime.now()

    logging.info(
        f"HYCOM {epoch_2_dt([self.epoch[year][slices[0][0]]])[0].date().isoformat()} "
        f"{var}: downloaded {int(len(payload_netcdf.content)/8/1000)} Kb "
        f"in {(t2-t1).seconds}.{str((t2-t1).microseconds)[0:3]}s. "
        f"parsed and inserted {n2 - n1} rows in "
        f"{(t3-t2).seconds}.{str((t3-t2).microseconds)[0:3]}s. "
        f"{flatten - len(grid)} null values removed, "
        f"{len(grid) - (n2 - n1)} duplicates ignored")

    return