Ejemplo n.º 1
0
# ---------- allocations ---------- #
MAE = np.empty((N_days, N_fcst, N_grids, N_en))
SPREAD = np.empty((N_days, N_fcst, N_grids, N_en))
CRPS = np.empty((N_days, N_fcst, N_grids, N_en))

print("Computing CRPS ...")

for lead in range(N_fcst):
    print("lead = {}".format(lead))
    
    with h5py.File(REFCST_dir + "{}_{}_lead{}.hdf".format(perfix_raw, year, lead), 'r') as h5io:
        RAW = h5io[key_raw][:, :EN, ...][..., indx, indy]
            
    with h5py.File(REFCST_dir + "{}_{}_lead{}.hdf".format(perfix_smooth, year, lead), 'r') as h5io:
        SMOOTH = h5io[key_smooth][:, :EN, ...][..., indx, indy]
    
    AnEn = W_SL*RAW + (1-W_SL)*SMOOTH
    
    for i, en in enumerate(EN_range):
    
        crps, mae, _ = metrics.CRPS_1d_nan(BCH_obs[:, lead, :], AnEn[:, :en, ...])
        MAE[:, lead, :, i] = mae
        CRPS[:, lead, :, i] = crps

tuple_save = (MAE, CRPS,)
label_save = ['MAE', 'CRPS',]
du.save_hdf5(tuple_save, label_save, save_dir, '{}_CRPS_vs_EN_{}.hdf'.format(perfix_smooth, year))


Ejemplo n.º 2
0
fcst_raw = np.empty((N_days, N_fcst, EN, N_grids))

for lead in range(N_fcst):

    with h5py.File(
            REFCST_dir +
            "{}_{}_lead{}.hdf".format(prefix_raw, year_fcst, lead),
            'r') as h5io:
        RAW = h5io[key_raw][:, :EN, ...]
    RAW = RAW[..., ~land_mask_bc]

    fcst_ref[:, lead, :, :] = RAW
    fcst_raw[:, lead, :, :] = RAW

fcst_ref[fcst_ref < 0] = 0
fcst_raw[fcst_raw < 0] = 0

# ---------- Schaake shuffle ---------- #
print('SimSchaake starts ...')
start_time = time.time()
fcst_ss = sim_schaake(year_analog, fcst_ref, fcst_raw, APCP, ERA5, weights)
print("... Completed. Time = {} sec ".format((time.time() - start_time)))

for l in range(N_fcst):
    tuple_save = (fcst_ss[:, l, :, :], )
    label_save = [
        key_raw,
    ]
    du.save_hdf5(tuple_save, label_save, REFCST_dir,
                 '{}_SS_{}_lead{}.hdf'.format(prefix_raw, year_fcst, l))
Ejemplo n.º 3
0
        dscale_x = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_x1, ind=0)
        dscale_y = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_y1, ind=0)
    elif date.month in [3, 4, 5]:
        dscale_a = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_a2, ind=0)
        dscale_b = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_b2, ind=0)
        dscale_x = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_x2, ind=0)
        dscale_y = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_y2, ind=0)
    elif date.month in [6, 7, 8]:
        dscale_a = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_a3, ind=0)
        dscale_b = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_b3, ind=0)
        dscale_x = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_x3, ind=0)
        dscale_y = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_y3, ind=0)
    elif date.month in [9, 10, 11]:
        dscale_a = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_a4, ind=0)
        dscale_b = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_b4, ind=0)
        dscale_x = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_x4, ind=0)
        dscale_y = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_y4, ind=0)
    RESULT_a[n, ...] = dscale_a
    RESULT_b[n, ...] = dscale_b
    RESULT_x[n, ...] = dscale_x
    RESULT_y[n, ...] = dscale_y
# append
data.append(REGRID_PCT)
data.append(RESULT_a)
data.append(RESULT_b)
data.append(RESULT_x)
data.append(RESULT_y)
label += ['PCT_REGRID', 'UNET_A', 'UNET_B', 'XNET_A', 'XNET_B']

du.save_hdf5(tuple(data), label, out_dir=save_dir, filename='PRISM_PRED_NCEP_PCT_BC_2016_2018.hdf')
    REGRID_P = du.log_trans(REGRID_P)
    CLIM_4km_duplicate = du.log_trans(CLIM_4km_duplicate)
    CLIM_REGRID_duplicate = du.log_trans(CLIM_REGRID_duplicate)
    
    REGRID_P[REGRID_P<thres] = 0
    # ------------------------ #

    # save data
    dict_save['{}_4km'.format(VAR)] = PRISM_P
    dict_save['{}_REGRID'.format(VAR)] = REGRID_P
    dict_save['{}_CLIM_4km'.format(VAR)] = CLIM_4km_duplicate
    dict_save['{}_CLIM_REGRID'.format(VAR)] = CLIM_REGRID_duplicate
    # collecting label
    label_save.append('{}_4km'.format(VAR))
    label_save.append('{}_REGRID'.format(VAR))
    label_save.append('{}_CLIM_4km'.format(VAR))
    label_save.append('{}_CLIM_REGRID'.format(VAR))
    
    # dictionary to tuple
    tuple_etopo = (etopo_4km, etopo_regrid)
    tuple_grids = (lon_025, lat_025, lon_4km, lat_4km, land_mask)
    # mark labels
    label_etopo = ['etopo_4km', 'etopo_regrid']
    label_grids = ['lon_025', 'lat_025', 'lon_4km', 'lat_4km', 'land_mask']

    # save hdf
    tuple_save = tuple(dict_save.values()) + tuple_etopo + tuple_grids
    label_all = label_save + label_etopo + label_grids
    du.save_hdf5(tuple_save, label_all, out_dir=PRISM_dir, filename='PRISM_{}_features_2015_2020.hdf'.format(VAR))

Ejemplo n.º 5
0
# =============== CRPS computation =============== #

# allocation
MAE = np.empty((N_days, N_fcst, N_grids))
CRPS = np.empty((N_days, N_fcst, N_grids))

for lead in range(N_fcst):
    print("computing lead: {}".format(lead))

    with h5py.File(REFCST_dir + "GEFS_QM_{}_lead{}.hdf".format(year, lead),
                   'r') as h5io:
        GEFS_stn = h5io['gefs_qm'][...][..., indx, indy]

    crps, mae, _ = metrics.CRPS_1d_nan(BCH_obs[:, lead, :], GEFS_stn)

    MAE[:, lead, ...] = mae
    CRPS[:, lead, ...] = crps

# save (all lead times, per year, GEFS only)
tuple_save = (
    MAE,
    CRPS,
)
label_save = [
    'MAE',
    'CRPS',
]
du.save_hdf5(tuple_save, label_save, save_dir,
             'GEFS_CRPS_BCH_{}.hdf'.format(year))
Ejemplo n.º 6
0
print('Lapse rate correction')
date_ref = 365 + 365
date_list = [
    datetime(2018, 1, 1, 0) + timedelta(days=x) for x in range(date_ref)
]
gamma_mon = [
    -4.4, -5.9, -7.1, -7.8, -8.1, -8.2, -8.1, -8.1, -7.7, -6.8, -5.5, -4.7
]

TMEAN_fix = np.zeros((date_ref, ) + lon_4km.shape)
TMEAN_correct = np.zeros((date_ref, ) + lon_4km.shape)
delta_etopo = etopo_4km - etopo_regrid

for i, date in enumerate(date_list):
    mon_id = date.month - 1
    TMEAN_fix[i,
              ...] = TMEAN_REGRID[i] + gamma_mon[mon_id] * 1e-3 * delta_etopo
    gamma_interp = 0.5 * du.interp2d_wraper(lon_025,
                                            lat_025,
                                            jra_gamma[i, ...],
                                            lon_4km,
                                            lat_4km,
                                            method=interp_method)
    TMEAN_correct[i, ...] = TMEAN_REGRID[i] + gamma_interp * delta_etopo

TMEAN_correct[:, land_mask] = np.nan

data_save = (lon_4km, lat_4km, TMEAN_correct, TMEAN_fix, land_mask)
label_save = ['lon_4km', 'lat_4km', 'TMEAN_correct', 'TMEAN_fix', 'land_mask']
du.save_hdf5(data_save, label_save, JRA_dir, 'JRA_TMEAN_correct_2018_2020.hdf')
Ejemplo n.º 7
0
    # import 3d (time, lat, lon) features
    hdf_io = h5py.File(PRISM_dir + 'PRISM_{}_features_2015_2020.hdf'.format(var), 'r')
    PRISM_T = hdf_io['{}_4km'.format(var)][ind_pred, ...]
    REGRID_T = hdf_io['{}_REGRID'.format(var)][ind_pred, ...]
    hdf_io.close()

    # import pre-trained models (import together for saving time)
    # UNET
    unet = {}
    unet['djf'] = keras.models.load_model(model_import_dir+'UNET3_{}_djf.hdf'.format(var))
    unet['mam'] = keras.models.load_model(model_import_dir+'UNET3_{}_mam.hdf'.format(var))
    unet['jja'] = keras.models.load_model(model_import_dir+'UNET3_{}_jja.hdf'.format(var))
    unet['son'] = keras.models.load_model(model_import_dir+'UNET3_{}_son.hdf'.format(var))
    for n, date in enumerate(pred_list):
        X = (REGRID_T[n, ...], etopo_4km, etopo_regrid)
        print(date)
        if date.month in [12, 1, 2]:
            temp_unet = vu.pred_domain(X, land_mask, unet['djf'], param, method='norm_std')
        elif date.month in [3, 4, 5]:
            temp_unet = vu.pred_domain(X, land_mask, unet['mam'], param, method='norm_std')
        elif date.month in [6, 7, 8]:
            temp_unet = vu.pred_domain(X, land_mask, unet['jja'], param, method='norm_std')
        elif date.month in [9, 10, 11]:
            temp_unet = vu.pred_domain(X, land_mask, unet['son'], param, method='norm_std')
    
        RESULT_UNET[n, ...] = temp_unet
            
    tuple_save = (lon_4km, lat_4km, PRISM_T, REGRID_T, RESULT_UNET)
    label_save = ['lon_4km', 'lat_4km', '{}_4km'.format(var), '{}_REGRID'.format(var), 'RESULT_UNET']
    du.save_hdf5(tuple_save, label_save, out_dir=save_dir, filename='PRISM_PRED_{}_test.hdf'.format(var))
IND_base = []
for i in range(grid_shape[0]):
    for j in range(grid_shape[1]):
        if ~bc_in_base[i, j]:
            IND.append([i, j])
        if ~land_mask[i, j]:
            IND_base.append([i, j])
IND = np.array(IND, dtype=np.int)
IND_base = np.array(IND_base, dtype=np.int)

# ---------------------------------- #
# the main loop
print("Main program starts ...")
start_time = time.time()
OUT = SL_search(
    IND,
    IND_base,
    era_3hq,
    Z,
    facet_h,
    facet_m,
    facet_l,
    W_facet,
)
print("{} secs for all locs".format(time.time() - start_time))
# ---------------------------------- #
# save
tuple_save = (OUT, )
label_save = ['IND']
du.save_hdf5(tuple_save, label_save, save_dir, 'S40_mon{}.hdf'.format(month))
Ejemplo n.º 9
0
    for lead in range(N_fcst):
        for d, date in enumerate(date_list):
            # ini date + lead time
            date_true = date + timedelta(hours=FCSTs[lead])

            if date_true.month-1 in month_around: 
                flag_pick[mon, lead, d] = 1.0
            else:
                flag_pick[mon, lead, d] = 0.0

MEAN = np.empty((12, N_fcst, N_grids))

for lead in range(N_fcst):
    MEAN[:, lead, :] = climo_mean(ERA5_obs[:, lead, :], flag_pick[:, lead, :])

# ---------- Duplicate to 2016-2020 ---------- #

N_days = 366 + 365*3
date_base = datetime(2016, 1, 1)
date_list = [date_base + timedelta(days=x) for x in np.arange(N_days, dtype=np.float)]

MEAN_BCH = np.empty((N_days, N_fcst, N_grids))

for i, date in enumerate(date_list):
    mon_ = date.month-1
    MEAN_BCH[i, :, :] = MEAN[mon, ...]
    
tuple_save = (MEAN_BCH,)
label_save = ['MEAN_BCH']
du.save_hdf5(tuple_save, label_save, save_dir, 'BCH_climo-mean_ERA5.hdf')
Ejemplo n.º 10
0
param['size'] = 96

# loop over variables and seasons
VARS = ['TMEAN']

for VAR in VARS:
    # import 3d (time, lat, lon) features
    with h5py.File(JRA_dir + 'JRA_{}_features_2015_2020.hdf'.format(VAR), 'r') as hdf_io:
        REGRID_T = hdf_io['{}_REGRID'.format(VAR)][ind_pred, ...]
        
    shape_3d = REGRID_T.shape
    RESULT_CLEAN = np.zeros(shape_3d)
    RESULT_025 = np.zeros((shape_3d[0],)+lon_025.shape)
    
    for n in range(shape_3d[0]):
        #start_time = time.time()
        print('\t{}'.format(n))
        X = (REGRID_T[n, ...], etopo_regrid)
        temp_unet = vu.pred_domain(X, land_mask, CGAN, param, method='norm_std')
        #temp_025 = du.nearest_wraper(lon_4km, lat_4km, temp_unet, lon_025, lat_025)
        temp_025 = du.interp2d_wraper(lon_4km, lat_4km, temp_unet, lon_025, lat_025, method=interp_method)
        temp_4km = du.interp2d_wraper(lon_025, lat_025, temp_025, lon_4km, lat_4km, method=interp_method)
        RESULT_025[n, ...] = temp_025
        RESULT_CLEAN[n, ...] = temp_4km
        #print("--- %s seconds ---" % (time.time() - start_time))

    RESULT_CLEAN[:, land_mask] = np.nan
    tuple_save = (lon_4km, lat_4km, RESULT_CLEAN, RESULT_025, etopo_4km, etopo_regrid)
    label_save = ['lon_4km', 'lat_4km', '{}_REGRID'.format(VAR), '{}_025'.format(VAR), 'etopo_4km', 'etopo_regrid']
    du.save_hdf5(tuple_save, label_save, out_dir=JRA_dir, filename='JRA_{}_clean_{}.hdf'.format(VAR, year))
    
Ejemplo n.º 11
0
            CLIM_elev[i, en, ..., 1] = etopo_025

    with h5py.File(
            REFCST_dir +
            "{}_final_dress_SS_{}_lead{}.hdf".format(TYPE, year, lead),
            'r') as h5io:
        H15_SL = h5io['AnEn'][:N_sample, :EN, ...]

    # noisy AnEn preprocess
    H15_SL[H15_SL < 0] = 0
    H15_SL = np.log(H15_SL + 1)
    RAW[..., ~land_mask_bc] = H15_SL
    RAW[..., land_mask_bc] = 0

    for i in range(N_sample):
        X = np.concatenate((RAW[i, ..., None], CLIM_elev[i, ...]), axis=-1)
        temp_ = model.predict([X])
        cgan_raw[i, ...] = temp_[-1][..., 0]

    cgan_raw[cgan_raw < 0] = 0
    cgan_raw = np.exp(cgan_raw) - 1  # <--- de-normalized

    cgan_raw[..., land_mask_bc] = np.nan

    tuple_save = (cgan_raw, )
    label_save = [
        'cnn_pred',
    ]
    du.save_hdf5(tuple_save, label_save, REFCST_dir,
                 '{}_CNN_{}_lead{}.hdf'.format(TYPE, year, lead))
        count += L_day

    print('Interpolation')
    for i in range(days_ref):
        if i % 200 == 0:
            print('\tday index: {}'.format(i))
        temp_interp = du.interp2d_wraper(lon_025,
                                         lat_025,
                                         jra_var[i, ...],
                                         lon_4km,
                                         lat_4km,
                                         method=interp_method)
        # land mask applied
        temp_interp[land_mask] = np.nan
        jra_interp[i, ...] = temp_interp

    print('Feature engineering')
    if VAR in ['TMAX', 'TMIN', 'TMEAN']:
        print('\tK to C')
        jra_var = jra_var - 273.15
        jra_interp = jra_interp - 273.15

    data_save = (lon_4km, lat_4km, jra_interp, TMEAN_4km, etopo_regrid,
                 land_mask)
    label_save = [
        'lon_4km', 'lat_4km', '{}_REGRID'.format(VAR), 'TMEAN_4km',
        'etopo_regrid', 'land_mask'
    ]
    du.save_hdf5(data_save, label_save, JRA_dir,
                 'JRA_{}_features_US_2015_2020.hdf'.format(VAR))
            X_3d_valid = (T_REGRID[ind_valid, ...][ind_valid_temp, ...],
                          T_CLIM_4km[ind_valid, ...][ind_valid_temp, ...])
            I, C, OUT = vu.baseline_estimator(X_3d_train, X_2d, Y, X_3d_valid,
                                              X_2d, land_mask)
            OUT_with_clim[ind_valid_temp, ...] = OUT
            C_with_clim[..., N_with_clim * i:N_with_clim * (i + 1)] = C
            I_with_clim[..., i] = I

    if clim:
        # save as hdf
        tuple_save = (lon_4km, lat_4km, C_no_clim, I_no_clim, OUT_no_clim,
                      C_with_clim, I_with_clim, OUT_with_clim)
        label_save = [
            'lon_4km', 'lat_4km', 'C_no_clim', 'I_no_clim', 'OUT_no_clim',
            'C_with_clim', 'I_with_clim', 'OUT_with_clim'
        ]
        du.save_hdf5(tuple_save,
                     label_save,
                     out_dir=save_dir,
                     filename='BASELINE_PRISM_{}_2018_2020.hdf'.format(var))
    else:
        # save as hdf
        tuple_save = (lon_4km, lat_4km, C_no_clim, I_no_clim, OUT_no_clim)
        label_save = [
            'lon_4km', 'lat_4km', 'C_no_clim', 'I_no_clim', 'OUT_no_clim'
        ]
        du.save_hdf5(tuple_save,
                     label_save,
                     out_dir=save_dir,
                     filename='BASELINE_PRISM_{}_2018_2020.hdf'.format(var))
Ejemplo n.º 14
0
    #     print("SG filter starts ...")
    #     start_time2 = time.time()
    AnEn_grid = np.empty((L_fcst_days, EN) + bc_shape)
    #     AnEn_grid[...] = 0.0

    #     AnEn_SG = np.empty((L_fcst_days, EN)+bc_shape)
    #     AnEn_SG[...] = np.nan

    for i in range(L_fcst_days):
        for j in range(EN):
            AnEn_grid[i, j, ~land_mask_bc] = AnEn[i, ..., j]
#             # smoothings
#             temp_ = AnEn_grid[i, j, ...]
#             temp_barnes = ana.sg2d(temp_, window_size=9, order=3, derivative=None) # <-- copied
#             temp_barnes[~land_mask_bc] = temp_[~land_mask_bc]
#             temp_barnes = ana.sg2d(temp_barnes, window_size=9, order=3, derivative=None) # <-- copied
#             temp_barnes[land_mask_bc] = np.nan
#             AnEn_SG[i, j, ...] = temp_barnes

    AnEn_grid[..., land_mask_bc] = np.nan
    #     print("... Completed. Time = {} sec ".format((time.time() - start_time2)))
    #     tuple_save = (AnEn_grid, AnEn_SG)
    #     label_save = ['AnEn', 'AnEn_SG']

    tuple_save = (AnEn_grid, )
    label_save = [
        'AnEn',
    ]
    du.save_hdf5(tuple_save, label_save, REFCST_dir,
                 'BASE_final_{}_lead{}.hdf'.format(year_fcst, lead))
Ejemplo n.º 15
0
                clim_4km[i,
                         ...] = CLIM[var][mon_id, ...] / mon_days_366[mon_id]
                clim_regrid[i,
                            ...] = CLIM_REGRID[var][mon_id,
                                                    ...] / mon_days_366[mon_id]
            else:
                clim_4km[i,
                         ...] = CLIM[var][mon_id, ...] / mon_days_365[mon_id]
                clim_regrid[i,
                            ...] = CLIM_REGRID[var][mon_id,
                                                    ...] / mon_days_365[mon_id]

        clim_4km = du.log_trans(clim_4km)
        clim_regrid = du.log_trans(clim_regrid)
        clim_regrid[clim_regrid < thres] = 0

    # save data
    data_save = (lon_4km, lat_4km, lon_ncep[domain_ind[0]:domain_ind[1],
                                            domain_ind[2]:domain_ind[3]],
                 lat_ncep[domain_ind[0]:domain_ind[1],
                          domain_ind[2]:domain_ind[3]], var_4km, clim_4km,
                 clim_regrid, ncep_var[:, domain_ind[0]:domain_ind[1],
                                       domain_ind[2]:domain_ind[3]], etopo_4km,
                 etopo_regrid, land_mask)
    label_save = [
        'lon_4km', 'lat_4km', 'lon_ncep', 'lat_ncep', '{}_REGRID'.format(var),
        '{}_CLIM_4km'.format(var), '{}_CLIM_REGRID'.format(var),
        '{}_originals'.format(var), 'etopo_4km', 'etopo_regrid', 'land_mask'
    ]
    du.save_hdf5(data_save, label_save, NCEP_dir,
                 'NCEP_{}_features_BC_2016_2020.hdf'.format(var))
                                       unet['djf'],
                                       param,
                                       method='norm_std')
        elif date.month in [3, 4, 5]:
            temp_unet = vu.pred_domain(X,
                                       land_mask,
                                       unet['mam'],
                                       param,
                                       method='norm_std')
        elif date.month in [6, 7, 8]:
            temp_unet = vu.pred_domain(X,
                                       land_mask,
                                       unet['jja'],
                                       param,
                                       method='norm_std')
        elif date.month in [9, 10, 11]:
            temp_unet = vu.pred_domain(X,
                                       land_mask,
                                       unet['son'],
                                       param,
                                       method='norm_std')

        RESULT_UNET[n, ...] = temp_unet

    tuple_save = (lon_4km, lat_4km, REGRID_T, RESULT_UNET)
    label_save = ['lon_4km', 'lat_4km', '{}_REGRID'.format(var), 'RESULT_UNET']
    du.save_hdf5(tuple_save,
                 label_save,
                 out_dir=save_dir,
                 filename='ERA_PRED_{}_2018_2020.hdf'.format(var))
Ejemplo n.º 17
0
                                  bc_inds[2]:bc_inds[3]]

    era_pct_daily = np.sum(np.reshape(era_pct, (N_days, 8, 48, 112)), axis=1)
    ERA5 += (era_pct_daily[..., indx, indy], )

ERA5_obs = np.concatenate(ERA5, axis=0)

# ========== BCH ========== #

OBS = ()
for key in stn_code:
    with pd.HDFStore(BACKUP_dir + 'BCH_PREC_QC_1D_2016_2020.hdf',
                     'r') as hdf_io:
        pd_temp = hdf_io[key]
        pd_temp.index = pd_temp['datetime']
        pd_temp = pd_temp['2016-01-01':'2019-12-31']
        obs_ = pd_temp['PREC_HOUR_QC'].values
    OBS += (obs_[:, None], )

BCH_obs = np.concatenate(OBS, axis=-1)

# ! <--- data cleaning
BCH_obs[BCH_obs > 300] = np.nan

for i in range(len(stn_code)):
    BCH_obs[:, i] = bu.clean_no_response(BCH_obs[:, i], tol_window=60 * 8)

tuple_save = (ERA5_obs, BCH_obs, flag_pick)
label_save = ['ERA5_obs', 'BCH_obs', 'stn_flag']
du.save_hdf5(tuple_save, label_save, save_dir, 'BCH_ERA5_1D_pairs.hdf')
Ejemplo n.º 18
0
        obs = obs[flag_nonan]
        fcst = fcst[flag_nonan]
        L = np.sum(flag_nonan)

        o_bar_ = np.mean(obs)

        o_bar[d] = o_bar_

        for n in range(N_boost):

            ind_bagging = np.random.choice(L, size=L, replace=True)
            obs_ = obs[ind_bagging]
            fcst_ = fcst[ind_bagging]

            prob_true_, prob_pred_ = reliability_diagram(
                obs_, fcst_, hist_bins)
            brier_ = brier_score_loss(obs_, fcst_)

            prob_true[d, :, n] = prob_true_
            prob_pred[d, :, n] = prob_pred_
            brier[d, n] = brier_

        hist_bins_ = np.mean(prob_pred[d, ...], axis=1)
        use_, _ = np.histogram(fcst, bins=np.array(list(hist_bins_) + [1.0]))
        use[d, :] = use_

    tuple_save = (brier, prob_true, prob_pred, use, o_bar)
    label_save = ['brier', 'pos_frac', 'pred_value', 'use', 'o_bar']
    du.save_hdf5(tuple_save, label_save, save_dir,
                 '{}_Calib_loc{}.hdf'.format(prefix_out, r))
Ejemplo n.º 19
0
    data_mask = np.squeeze(temp_io.dataset_mask())
    N_lon = temp_io.width
    N_lat = temp_io.height
    temp_io.close()
    lon, lat = np.meshgrid(np.arange(bounds[0], bounds[0] + dx * N_lon, dx),
                           np.arange(bounds[1], bounds[1] + dy * N_lat, dy))

    # loop over files
    PRISM_PCT = np.empty((len(file_dir), N_lat, N_lon))
    datenum = []
    for i, temp_dir in enumerate(file_dir):
        temp_file = glob(temp_dir + '*.bil')[0]
        #print(temp_file)
        temp_io = rasterio.open(temp_file, 'r')
        temp_data = np.squeeze(temp_io.read())
        temp_io.close()
        # vals
        temp_data[data_mask == 0] = np.nan
        temp_data = np.flipud(temp_data)
        PRISM_PCT[i, ...] = temp_data
        # datenum
        date_str = basename(temp_file)[-16:-8]  # get date string from filename
        datenum += du.dt_to_sec([datetime.strptime(date_str, '%Y%m%d')])
    # save hdf5
    tuple_save = (PRISM_PCT, np.array(datenum), lon, lat)
    label_save = ['PRISM_{}'.format(var), 'datenum', 'lon', 'lat']
    du.save_hdf5(tuple_save,
                 label_save,
                 out_dir=PRISM_dir,
                 filename='PRISM_{}_2015_2020.hdf'.format(var))
Ejemplo n.º 20
0
        prism_025[i, ...] = temp_025
        prism_regrid[i, ...] = temp_regrid

    # collecting fields
    dict_4km[VAR] = prism_4km
    dict_025[VAR] = prism_025
    dict_regrid[VAR] = prism_regrid
    # collecting label
    label_4km.append(VAR + '_4km')
    label_025.append(VAR + '_025')
    label_regrid.append(VAR + '_REGRID')

# dictionary to tuple
tuple_4km = tuple(dict_4km.values())
tuple_025 = tuple(dict_025.values())
tuple_regrid = tuple(dict_regrid.values())
tuple_etopo = (etopo_4km, etopo_025, etopo_regrid)
tuple_grids = (lon_025, lat_025, lon_4km, lat_4km, land_mask)

# mark labels
label_etopo = ['etopo_4km', 'etopo_025', 'etopo_regrid']
label_grids = ['lon_025', 'lat_025', 'lon_4km', 'lat_4km', 'land_mask']

# save hdf
tuple_save = tuple_4km + tuple_025 + tuple_regrid + tuple_etopo + tuple_grids
label_save = label_4km + label_025 + label_regrid + label_etopo + label_grids
du.save_hdf5(tuple_save,
             label_save,
             out_dir=PRISM_dir,
             filename='PRISM_regrid_clim.hdf')
Ejemplo n.º 21
0
    land_mask_025 = hdf_io['land_mask_025'][...]
    land_mask_terrain_025 = hdf_io['land_mask_terrain_025'][...]

    land_mask = hdf_io['land_mask'][...]
    land_mask_terrain_4km = hdf_io['land_mask_terrain'][...]

    etopo_4km = hdf_io['etopo_4km'][...]
    etopo_regrid = hdf_io['etopo_regrid'][...]

land_mask_025[:, 155:] = True
land_mask_025[152:, :] = True
land_mask_025[:5, :] = True

land_mask_terrain_025[:, 155:] = True
land_mask_terrain_025[145:, :] = True
land_mask_terrain_025[:5, :] = True

# save
tuple_save = (lon_4km, lat_4km, lon_025, lat_025, etopo_4km, etopo_regrid,
              land_mask, land_mask_025, land_mask_terrain_4km,
              land_mask_terrain_025)
label_save = [
    'lon_4km', 'lat_4km', 'lon_025', 'lat_025', 'etopo_4km', 'etopo_regrid',
    'land_mask', 'land_mask_025', 'land_mask_terrain', 'land_mask_terrain_025'
]
du.save_hdf5(tuple_save,
             label_save,
             out_dir=PRISM_dir,
             filename='land_mask_NA.hdf')
        with nc.Dataset(filenames[VAR][file_num], 'r') as nc_io:
            temp_var[j, ...] = np.squeeze(nc_io.variables[nc_keys[VAR]][...])
    era_sfp[i, ...] = f(temp_var, axis=0)

print('Lapse rate with linear regression')
sorter = np.array(
    [19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

for i in range(days_ref):
    print(i)
    for j in range(grid_shape[0]):
        for k in range(grid_shape[1]):
            temp_lev = np.array(A + B * era_sfp[i, j, k])
            temp_height = 1000 * metpy.calc.pressure_to_height_std(
                temp_lev * units.Pa).__array__()
            temp_temp = era_var[i, :, j, k]
            era_height[i, :, j, k] = temp_height
            era_lev[i, :, j, k] = temp_lev
            ind1 = np.searchsorted(temp_height, temp_height[0] + 1500)
            ind2 = np.searchsorted(temp_height, temp_height[0] + 2500)
            era_gamma[i, j, k] = linear_slope(temp_height[ind1:ind2],
                                              temp_temp[ind1:ind2])

data_save = (lon_025, lat_025, era_var, era_sfp, era_gamma, era_height,
             era_lev)
label_save = [
    'lon_025', 'lat_025', 'era_var', 'era_sfp', 'era_gamma', 'era_height',
    'era_lev'
]
du.save_hdf5(data_save, label_save, ERA_dir, 'ERA_TMEAN_GAMMA_2018_2020.hdf')
Ejemplo n.º 23
0
        obs = obs[flag_nonan]
        fcst = fcst[flag_nonan]
        L = np.sum(flag_nonan)

        o_bar_ = np.mean(obs)

        o_bar[d] = o_bar_

        for n in range(N_boost):

            ind_bagging = np.random.choice(L, size=L, replace=True)
            obs_ = obs[ind_bagging]
            fcst_ = fcst[ind_bagging]

            prob_true_, prob_pred_ = reliability_diagram(
                obs_, fcst_, hist_bins)
            brier_ = brier_score_loss(obs_, fcst_)

            prob_true[d, :, n] = prob_true_
            prob_pred[d, :, n] = prob_pred_
            brier[d, n] = brier_

        hist_bins_ = np.mean(prob_pred[d, ...], axis=1)
        use_, _ = np.histogram(fcst, bins=np.array(list(hist_bins_) + [1.0]))
        use[d, :] = use_

    tuple_save = (brier, prob_true, prob_pred, use, o_bar)
    label_save = ['brier', 'pos_frac', 'pred_value', 'use', 'o_bar']
    du.save_hdf5(tuple_save, label_save, save_dir,
                 'GEFS_99th_Calib_loc{}.hdf'.format(r))
Ejemplo n.º 24
0
    with h5py.File(ERA_dir + 'PT_3hr_{}.hdf'.format(year), 'r') as h5io:
        era_pct = h5io['era_025'][...]
    PCT_history['{}'.format(year)] = era_pct

for year in range(2000, 2020):
    print("Processing year: {}".format(year))
    N_days = (datetime(year + 1, 1, 1) - datetime(year, 1, 1)).days
    ERA_fcst = np.zeros((N_days, N_fcst, 160, 220))

    for day in range(N_days):
        for t, fcst_temp in enumerate(FCSTs):
            # fcsted (targeted) date
            date_true = datetime(
                year, 1, 1) + timedelta(days=day) + timedelta(hours=fcst_temp)
            # handling cross years
            year_true = int(date_true.year)
            ind_true = int(
                (date_true - datetime(year_true, 1, 1)).total_seconds() / 60 /
                60 / freq)
            #
            ERA_fcst[day, t,
                     ...] = PCT_history['{}'.format(year_true)][ind_true, ...]

    ERA_fcst[ERA_fcst < 1e-5] = 0.0
    tuple_save = (ERA_fcst, )
    label_save = ['era_fcst']

    filename = 'ERA5_GEFS-fcst_{}.hdf'.format(year)

    du.save_hdf5(tuple_save, label_save, ERA_dir, filename)
Ejemplo n.º 25
0
    o_bar = np.mean(BCH_binary)

    hist_bins_base = np.mean(prob_pred_base, axis=1)
    hist_bins_bcnn = np.mean(prob_pred_bcnn, axis=1)
    hist_bins_sl = np.mean(prob_pred_sl, axis=1)
    hist_bins_scnn = np.mean(prob_pred_scnn, axis=1)

    use_base, _ = np.histogram(BASE_prob,
                               bins=np.array(list(hist_bins_base) + [1.0]))
    use_bcnn, _ = np.histogram(BCNN_prob,
                               bins=np.array(list(hist_bins_bcnn) + [1.0]))
    use_sl, _ = np.histogram(SL_prob,
                             bins=np.array(list(hist_bins_sl) + [1.0]))
    use_scnn, _ = np.histogram(SCNN_prob,
                               bins=np.array(list(hist_bins_scnn) + [1.0]))

    tuple_save = (prob_true_base, prob_true_bcnn, prob_true_sl, prob_true_scnn,
                  prob_pred_base, prob_pred_bcnn, prob_pred_sl, prob_pred_scnn,
                  brier_base, brier_bcnn, brier_sl, brier_scnn, use_base,
                  use_bcnn, use_sl, use_scnn, o_bar)

    label_save = [
        'prob_true_base', 'prob_true_bcnn', 'prob_true_sl', 'prob_true_scnn',
        'prob_pred_base', 'prob_pred_bcnn', 'prob_pred_sl', 'prob_pred_scnn',
        'brier_base', 'brier_bcnn', 'brier_sl', 'brier_scnn', 'use_base',
        'use_bcnn', 'use_sl', 'use_scnn', 'o_bar'
    ]

    du.save_hdf5(tuple_save, label_save, save_dir,
                 'Accum_Calib_lead{}_loc{}.hdf'.format(lead_, r))
MEAN = MEAN_BCH[flag_pick, ...]

# =============== Allocation and MAE computation =============== #

# N_days
if year % 4 == 0:
    N_days = 366
else:
    N_days = 365

# other params
N_fcst = 54
EN = 75
N_grids = BCH_obs.shape[-1]

MAE = np.empty((N_days, N_fcst, N_grids))

print("Computing MAE ...")

for lead in range(N_fcst):
    print("lead = {}".format(lead))
    MAE[:, lead, ...] = np.abs(BCH_obs[:, lead, :] - MEAN[:, lead, :])

tuple_save = (MAE, )
label_save = [
    'MAE',
]
du.save_hdf5(tuple_save, label_save, save_dir,
             'CLIM_MAE_BCH_{}.hdf'.format(year))
Ejemplo n.º 27
0
# loop over variables and seasons
VARS = ['TMEAN']

for VAR in VARS:

    # import 3d (time, lat, lon) features
    with h5py.File(JRA_dir + 'JRA_{}_features_US_2015_2020.hdf'.format(VAR), 'r') as hdf_io:
        PRISM_T = hdf_io['{}_4km'.format(VAR)][...]
        REGRID_T = hdf_io['{}_REGRID'.format(VAR)][...]

        
    shape_3d = REGRID_T.shape
    RESULT_CLEAN = np.zeros(shape_3d)
    RESULT_025 = np.zeros((shape_3d[0],)+lon_025.shape)

    for n in range(shape_3d[0]):
        print('\t{}'.format(n))
        X = (REGRID_T[n, ...], etopo_regrid)
        temp_unet = vu.pred_domain(X, land_mask, CGAN, param, method='norm_std')
        temp_025 = du.interp2d_wraper(lon_4km, lat_4km, temp_unet, lon_025, lat_025, method=interp_method)
        temp_4km = du.interp2d_wraper(lon_025, lat_025, temp_025, lon_4km, lat_4km, method=interp_method)
        
        RESULT_025[n, ...] = temp_025
        RESULT_CLEAN[n, ...] = temp_4km

    RESULT_CLEAN[:, land_mask] = np.nan
    tuple_save = (lon_4km, lat_4km, PRISM_T, RESULT_CLEAN, RESULT_025, etopo_4km, etopo_regrid)
    label_save = ['lon_4km', 'lat_4km', '{}_4km'.format(VAR), '{}_REGRID'.format(VAR), '{}_025'.format(VAR), 'etopo_4km', 'etopo_regrid']
    du.save_hdf5(tuple_save, label_save, out_dir=JRA_dir, filename='JRA_US_{}_clean_2015_2020.hdf'.format(VAR))
    
Ejemplo n.º 28
0
# ========== Region subsets ========== #

south = [-130, -121, 48.75, 50.25]
north = [-127.5, -110, 53, 60]

loc_id = []  # 0 van isl, 1 south, 2 rocky, 3 north

for i in range(len(stn_code)):
    stn_lat_temp = stn_lat[i]
    stn_lon_temp = stn_lon[i]

    if du.check_bounds(stn_lon_temp, stn_lat_temp, south):
        loc_id.append(1)
    elif du.check_bounds(stn_lon_temp, stn_lat_temp, north):
        loc_id.append(3)
    else:
        loc_id.append(2)

loc_id = np.array(loc_id)

flag_sw = loc_id == 1
flag_si = loc_id == 2
flag_n = loc_id == 3

# ========== Save ========== #

tuple_save = (stn_lon, stn_lat, flag_sw, flag_si, flag_n)
label_save = ['stn_lon', 'stn_lat', 'flag_sw', 'flag_si', 'flag_n']
du.save_hdf5(tuple_save, label_save, save_dir, 'BCH_wshed_groups.hdf')
Ejemplo n.º 29
0
        # id 0 and 1 are flattened grid points, reshape them to 2d.
        AnEn_full[..., ~land_mask_bc] = AnEn_
        AnEn_stn = AnEn_full[..., indx, indy]
    else:
        AnEn_stn = AnEn_[..., indx, indy]
        # cnn outputs can be negative, fix it here.
        AnEn_stn = ana.cnn_precip_fix(AnEn_stn)

    # extracting the 90-th threshold for initializaiton time + lead time
    for mon in range(12):
        flag_ = flag_pick[:, lead] == mon
        # stn obs
        obs_ = BCH_obs[flag_, lead, :]
        # fcst
        pred_ = AnEn_stn[flag_, ...]
        # station-wise threshold
        thres_ = BCH_90th[mon, :]

        # Brier Score ( ">=" is applied)
        obs_flag = obs_ >= thres_
        obs_flag[np.isnan(obs_)] = np.nan
        pred_flag = pred_ >= thres_

        BS[flag_, lead, :] = metrics.BS_binary_1d_nan(obs_flag, pred_flag)

# save (all lead times, per year, per experiment)
tuple_save = (BS, BCH_90th)
label_save = ['BS', 'stn_90th']
du.save_hdf5(tuple_save, label_save, save_dir,
             '{}_BS_BCH_{}.hdf'.format(prefix_out, year))
Ejemplo n.º 30
0
Z_l[land_mask] = 0
Z_m = gaussian_filter(etopo_025, 5 / np.pi)
Z_m[land_mask] = 0
Z_h = np.copy(etopo_025)
Z_h[land_mask] = 0

facet_h = facet(Z_h)
facet_m = facet(Z_m)
facet_l = facet(Z_l)

facet_h[land_mask] = np.nan
facet_m[land_mask] = np.nan
facet_l[land_mask] = np.nan

sigma_facet = window_stdev_slow(etopo_025, radius=5)
W_facet = sigma_facet / np.nanmax(sigma_facet)

# W_025 = 0.6*(sigma025-15)
# W_025[W_025>0.6] = 0.6
# W_025[W_025<0.2] = 0.2
# W_025[land_mask] = np.nan

W_025 = 0.8 * (sigma025 - 15)
W_025[W_025 > 0.8] = 0.8
W_025[W_025 < 0.2] = 0.2
W_025[land_mask] = np.nan

tuple_save = (facet_h, facet_m, facet_l, W_facet, W_025)
label_save = ['facet_h', 'facet_m', 'facet_l', 'W_facet', 'W_SL']
du.save_hdf5(tuple_save, label_save, save_dir, 'NA_SL_info.hdf')