# ---------- allocations ---------- # MAE = np.empty((N_days, N_fcst, N_grids, N_en)) SPREAD = np.empty((N_days, N_fcst, N_grids, N_en)) CRPS = np.empty((N_days, N_fcst, N_grids, N_en)) print("Computing CRPS ...") for lead in range(N_fcst): print("lead = {}".format(lead)) with h5py.File(REFCST_dir + "{}_{}_lead{}.hdf".format(perfix_raw, year, lead), 'r') as h5io: RAW = h5io[key_raw][:, :EN, ...][..., indx, indy] with h5py.File(REFCST_dir + "{}_{}_lead{}.hdf".format(perfix_smooth, year, lead), 'r') as h5io: SMOOTH = h5io[key_smooth][:, :EN, ...][..., indx, indy] AnEn = W_SL*RAW + (1-W_SL)*SMOOTH for i, en in enumerate(EN_range): crps, mae, _ = metrics.CRPS_1d_nan(BCH_obs[:, lead, :], AnEn[:, :en, ...]) MAE[:, lead, :, i] = mae CRPS[:, lead, :, i] = crps tuple_save = (MAE, CRPS,) label_save = ['MAE', 'CRPS',] du.save_hdf5(tuple_save, label_save, save_dir, '{}_CRPS_vs_EN_{}.hdf'.format(perfix_smooth, year))
fcst_raw = np.empty((N_days, N_fcst, EN, N_grids)) for lead in range(N_fcst): with h5py.File( REFCST_dir + "{}_{}_lead{}.hdf".format(prefix_raw, year_fcst, lead), 'r') as h5io: RAW = h5io[key_raw][:, :EN, ...] RAW = RAW[..., ~land_mask_bc] fcst_ref[:, lead, :, :] = RAW fcst_raw[:, lead, :, :] = RAW fcst_ref[fcst_ref < 0] = 0 fcst_raw[fcst_raw < 0] = 0 # ---------- Schaake shuffle ---------- # print('SimSchaake starts ...') start_time = time.time() fcst_ss = sim_schaake(year_analog, fcst_ref, fcst_raw, APCP, ERA5, weights) print("... Completed. Time = {} sec ".format((time.time() - start_time))) for l in range(N_fcst): tuple_save = (fcst_ss[:, l, :, :], ) label_save = [ key_raw, ] du.save_hdf5(tuple_save, label_save, REFCST_dir, '{}_SS_{}_lead{}.hdf'.format(prefix_raw, year_fcst, l))
dscale_x = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_x1, ind=0) dscale_y = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_y1, ind=0) elif date.month in [3, 4, 5]: dscale_a = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_a2, ind=0) dscale_b = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_b2, ind=0) dscale_x = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_x2, ind=0) dscale_y = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_y2, ind=0) elif date.month in [6, 7, 8]: dscale_a = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_a3, ind=0) dscale_b = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_b3, ind=0) dscale_x = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_x3, ind=0) dscale_y = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_y3, ind=0) elif date.month in [9, 10, 11]: dscale_a = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_a4, ind=0) dscale_b = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_b4, ind=0) dscale_x = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_x4, ind=0) dscale_y = pu.feature_to_domain_pct(IN, CLIM, IN_ETOPO, land_mask, size, edge, gap, model_unet_y4, ind=0) RESULT_a[n, ...] = dscale_a RESULT_b[n, ...] = dscale_b RESULT_x[n, ...] = dscale_x RESULT_y[n, ...] = dscale_y # append data.append(REGRID_PCT) data.append(RESULT_a) data.append(RESULT_b) data.append(RESULT_x) data.append(RESULT_y) label += ['PCT_REGRID', 'UNET_A', 'UNET_B', 'XNET_A', 'XNET_B'] du.save_hdf5(tuple(data), label, out_dir=save_dir, filename='PRISM_PRED_NCEP_PCT_BC_2016_2018.hdf')
REGRID_P = du.log_trans(REGRID_P) CLIM_4km_duplicate = du.log_trans(CLIM_4km_duplicate) CLIM_REGRID_duplicate = du.log_trans(CLIM_REGRID_duplicate) REGRID_P[REGRID_P<thres] = 0 # ------------------------ # # save data dict_save['{}_4km'.format(VAR)] = PRISM_P dict_save['{}_REGRID'.format(VAR)] = REGRID_P dict_save['{}_CLIM_4km'.format(VAR)] = CLIM_4km_duplicate dict_save['{}_CLIM_REGRID'.format(VAR)] = CLIM_REGRID_duplicate # collecting label label_save.append('{}_4km'.format(VAR)) label_save.append('{}_REGRID'.format(VAR)) label_save.append('{}_CLIM_4km'.format(VAR)) label_save.append('{}_CLIM_REGRID'.format(VAR)) # dictionary to tuple tuple_etopo = (etopo_4km, etopo_regrid) tuple_grids = (lon_025, lat_025, lon_4km, lat_4km, land_mask) # mark labels label_etopo = ['etopo_4km', 'etopo_regrid'] label_grids = ['lon_025', 'lat_025', 'lon_4km', 'lat_4km', 'land_mask'] # save hdf tuple_save = tuple(dict_save.values()) + tuple_etopo + tuple_grids label_all = label_save + label_etopo + label_grids du.save_hdf5(tuple_save, label_all, out_dir=PRISM_dir, filename='PRISM_{}_features_2015_2020.hdf'.format(VAR))
# =============== CRPS computation =============== # # allocation MAE = np.empty((N_days, N_fcst, N_grids)) CRPS = np.empty((N_days, N_fcst, N_grids)) for lead in range(N_fcst): print("computing lead: {}".format(lead)) with h5py.File(REFCST_dir + "GEFS_QM_{}_lead{}.hdf".format(year, lead), 'r') as h5io: GEFS_stn = h5io['gefs_qm'][...][..., indx, indy] crps, mae, _ = metrics.CRPS_1d_nan(BCH_obs[:, lead, :], GEFS_stn) MAE[:, lead, ...] = mae CRPS[:, lead, ...] = crps # save (all lead times, per year, GEFS only) tuple_save = ( MAE, CRPS, ) label_save = [ 'MAE', 'CRPS', ] du.save_hdf5(tuple_save, label_save, save_dir, 'GEFS_CRPS_BCH_{}.hdf'.format(year))
print('Lapse rate correction') date_ref = 365 + 365 date_list = [ datetime(2018, 1, 1, 0) + timedelta(days=x) for x in range(date_ref) ] gamma_mon = [ -4.4, -5.9, -7.1, -7.8, -8.1, -8.2, -8.1, -8.1, -7.7, -6.8, -5.5, -4.7 ] TMEAN_fix = np.zeros((date_ref, ) + lon_4km.shape) TMEAN_correct = np.zeros((date_ref, ) + lon_4km.shape) delta_etopo = etopo_4km - etopo_regrid for i, date in enumerate(date_list): mon_id = date.month - 1 TMEAN_fix[i, ...] = TMEAN_REGRID[i] + gamma_mon[mon_id] * 1e-3 * delta_etopo gamma_interp = 0.5 * du.interp2d_wraper(lon_025, lat_025, jra_gamma[i, ...], lon_4km, lat_4km, method=interp_method) TMEAN_correct[i, ...] = TMEAN_REGRID[i] + gamma_interp * delta_etopo TMEAN_correct[:, land_mask] = np.nan data_save = (lon_4km, lat_4km, TMEAN_correct, TMEAN_fix, land_mask) label_save = ['lon_4km', 'lat_4km', 'TMEAN_correct', 'TMEAN_fix', 'land_mask'] du.save_hdf5(data_save, label_save, JRA_dir, 'JRA_TMEAN_correct_2018_2020.hdf')
# import 3d (time, lat, lon) features hdf_io = h5py.File(PRISM_dir + 'PRISM_{}_features_2015_2020.hdf'.format(var), 'r') PRISM_T = hdf_io['{}_4km'.format(var)][ind_pred, ...] REGRID_T = hdf_io['{}_REGRID'.format(var)][ind_pred, ...] hdf_io.close() # import pre-trained models (import together for saving time) # UNET unet = {} unet['djf'] = keras.models.load_model(model_import_dir+'UNET3_{}_djf.hdf'.format(var)) unet['mam'] = keras.models.load_model(model_import_dir+'UNET3_{}_mam.hdf'.format(var)) unet['jja'] = keras.models.load_model(model_import_dir+'UNET3_{}_jja.hdf'.format(var)) unet['son'] = keras.models.load_model(model_import_dir+'UNET3_{}_son.hdf'.format(var)) for n, date in enumerate(pred_list): X = (REGRID_T[n, ...], etopo_4km, etopo_regrid) print(date) if date.month in [12, 1, 2]: temp_unet = vu.pred_domain(X, land_mask, unet['djf'], param, method='norm_std') elif date.month in [3, 4, 5]: temp_unet = vu.pred_domain(X, land_mask, unet['mam'], param, method='norm_std') elif date.month in [6, 7, 8]: temp_unet = vu.pred_domain(X, land_mask, unet['jja'], param, method='norm_std') elif date.month in [9, 10, 11]: temp_unet = vu.pred_domain(X, land_mask, unet['son'], param, method='norm_std') RESULT_UNET[n, ...] = temp_unet tuple_save = (lon_4km, lat_4km, PRISM_T, REGRID_T, RESULT_UNET) label_save = ['lon_4km', 'lat_4km', '{}_4km'.format(var), '{}_REGRID'.format(var), 'RESULT_UNET'] du.save_hdf5(tuple_save, label_save, out_dir=save_dir, filename='PRISM_PRED_{}_test.hdf'.format(var))
IND_base = [] for i in range(grid_shape[0]): for j in range(grid_shape[1]): if ~bc_in_base[i, j]: IND.append([i, j]) if ~land_mask[i, j]: IND_base.append([i, j]) IND = np.array(IND, dtype=np.int) IND_base = np.array(IND_base, dtype=np.int) # ---------------------------------- # # the main loop print("Main program starts ...") start_time = time.time() OUT = SL_search( IND, IND_base, era_3hq, Z, facet_h, facet_m, facet_l, W_facet, ) print("{} secs for all locs".format(time.time() - start_time)) # ---------------------------------- # # save tuple_save = (OUT, ) label_save = ['IND'] du.save_hdf5(tuple_save, label_save, save_dir, 'S40_mon{}.hdf'.format(month))
for lead in range(N_fcst): for d, date in enumerate(date_list): # ini date + lead time date_true = date + timedelta(hours=FCSTs[lead]) if date_true.month-1 in month_around: flag_pick[mon, lead, d] = 1.0 else: flag_pick[mon, lead, d] = 0.0 MEAN = np.empty((12, N_fcst, N_grids)) for lead in range(N_fcst): MEAN[:, lead, :] = climo_mean(ERA5_obs[:, lead, :], flag_pick[:, lead, :]) # ---------- Duplicate to 2016-2020 ---------- # N_days = 366 + 365*3 date_base = datetime(2016, 1, 1) date_list = [date_base + timedelta(days=x) for x in np.arange(N_days, dtype=np.float)] MEAN_BCH = np.empty((N_days, N_fcst, N_grids)) for i, date in enumerate(date_list): mon_ = date.month-1 MEAN_BCH[i, :, :] = MEAN[mon, ...] tuple_save = (MEAN_BCH,) label_save = ['MEAN_BCH'] du.save_hdf5(tuple_save, label_save, save_dir, 'BCH_climo-mean_ERA5.hdf')
param['size'] = 96 # loop over variables and seasons VARS = ['TMEAN'] for VAR in VARS: # import 3d (time, lat, lon) features with h5py.File(JRA_dir + 'JRA_{}_features_2015_2020.hdf'.format(VAR), 'r') as hdf_io: REGRID_T = hdf_io['{}_REGRID'.format(VAR)][ind_pred, ...] shape_3d = REGRID_T.shape RESULT_CLEAN = np.zeros(shape_3d) RESULT_025 = np.zeros((shape_3d[0],)+lon_025.shape) for n in range(shape_3d[0]): #start_time = time.time() print('\t{}'.format(n)) X = (REGRID_T[n, ...], etopo_regrid) temp_unet = vu.pred_domain(X, land_mask, CGAN, param, method='norm_std') #temp_025 = du.nearest_wraper(lon_4km, lat_4km, temp_unet, lon_025, lat_025) temp_025 = du.interp2d_wraper(lon_4km, lat_4km, temp_unet, lon_025, lat_025, method=interp_method) temp_4km = du.interp2d_wraper(lon_025, lat_025, temp_025, lon_4km, lat_4km, method=interp_method) RESULT_025[n, ...] = temp_025 RESULT_CLEAN[n, ...] = temp_4km #print("--- %s seconds ---" % (time.time() - start_time)) RESULT_CLEAN[:, land_mask] = np.nan tuple_save = (lon_4km, lat_4km, RESULT_CLEAN, RESULT_025, etopo_4km, etopo_regrid) label_save = ['lon_4km', 'lat_4km', '{}_REGRID'.format(VAR), '{}_025'.format(VAR), 'etopo_4km', 'etopo_regrid'] du.save_hdf5(tuple_save, label_save, out_dir=JRA_dir, filename='JRA_{}_clean_{}.hdf'.format(VAR, year))
CLIM_elev[i, en, ..., 1] = etopo_025 with h5py.File( REFCST_dir + "{}_final_dress_SS_{}_lead{}.hdf".format(TYPE, year, lead), 'r') as h5io: H15_SL = h5io['AnEn'][:N_sample, :EN, ...] # noisy AnEn preprocess H15_SL[H15_SL < 0] = 0 H15_SL = np.log(H15_SL + 1) RAW[..., ~land_mask_bc] = H15_SL RAW[..., land_mask_bc] = 0 for i in range(N_sample): X = np.concatenate((RAW[i, ..., None], CLIM_elev[i, ...]), axis=-1) temp_ = model.predict([X]) cgan_raw[i, ...] = temp_[-1][..., 0] cgan_raw[cgan_raw < 0] = 0 cgan_raw = np.exp(cgan_raw) - 1 # <--- de-normalized cgan_raw[..., land_mask_bc] = np.nan tuple_save = (cgan_raw, ) label_save = [ 'cnn_pred', ] du.save_hdf5(tuple_save, label_save, REFCST_dir, '{}_CNN_{}_lead{}.hdf'.format(TYPE, year, lead))
count += L_day print('Interpolation') for i in range(days_ref): if i % 200 == 0: print('\tday index: {}'.format(i)) temp_interp = du.interp2d_wraper(lon_025, lat_025, jra_var[i, ...], lon_4km, lat_4km, method=interp_method) # land mask applied temp_interp[land_mask] = np.nan jra_interp[i, ...] = temp_interp print('Feature engineering') if VAR in ['TMAX', 'TMIN', 'TMEAN']: print('\tK to C') jra_var = jra_var - 273.15 jra_interp = jra_interp - 273.15 data_save = (lon_4km, lat_4km, jra_interp, TMEAN_4km, etopo_regrid, land_mask) label_save = [ 'lon_4km', 'lat_4km', '{}_REGRID'.format(VAR), 'TMEAN_4km', 'etopo_regrid', 'land_mask' ] du.save_hdf5(data_save, label_save, JRA_dir, 'JRA_{}_features_US_2015_2020.hdf'.format(VAR))
X_3d_valid = (T_REGRID[ind_valid, ...][ind_valid_temp, ...], T_CLIM_4km[ind_valid, ...][ind_valid_temp, ...]) I, C, OUT = vu.baseline_estimator(X_3d_train, X_2d, Y, X_3d_valid, X_2d, land_mask) OUT_with_clim[ind_valid_temp, ...] = OUT C_with_clim[..., N_with_clim * i:N_with_clim * (i + 1)] = C I_with_clim[..., i] = I if clim: # save as hdf tuple_save = (lon_4km, lat_4km, C_no_clim, I_no_clim, OUT_no_clim, C_with_clim, I_with_clim, OUT_with_clim) label_save = [ 'lon_4km', 'lat_4km', 'C_no_clim', 'I_no_clim', 'OUT_no_clim', 'C_with_clim', 'I_with_clim', 'OUT_with_clim' ] du.save_hdf5(tuple_save, label_save, out_dir=save_dir, filename='BASELINE_PRISM_{}_2018_2020.hdf'.format(var)) else: # save as hdf tuple_save = (lon_4km, lat_4km, C_no_clim, I_no_clim, OUT_no_clim) label_save = [ 'lon_4km', 'lat_4km', 'C_no_clim', 'I_no_clim', 'OUT_no_clim' ] du.save_hdf5(tuple_save, label_save, out_dir=save_dir, filename='BASELINE_PRISM_{}_2018_2020.hdf'.format(var))
# print("SG filter starts ...") # start_time2 = time.time() AnEn_grid = np.empty((L_fcst_days, EN) + bc_shape) # AnEn_grid[...] = 0.0 # AnEn_SG = np.empty((L_fcst_days, EN)+bc_shape) # AnEn_SG[...] = np.nan for i in range(L_fcst_days): for j in range(EN): AnEn_grid[i, j, ~land_mask_bc] = AnEn[i, ..., j] # # smoothings # temp_ = AnEn_grid[i, j, ...] # temp_barnes = ana.sg2d(temp_, window_size=9, order=3, derivative=None) # <-- copied # temp_barnes[~land_mask_bc] = temp_[~land_mask_bc] # temp_barnes = ana.sg2d(temp_barnes, window_size=9, order=3, derivative=None) # <-- copied # temp_barnes[land_mask_bc] = np.nan # AnEn_SG[i, j, ...] = temp_barnes AnEn_grid[..., land_mask_bc] = np.nan # print("... Completed. Time = {} sec ".format((time.time() - start_time2))) # tuple_save = (AnEn_grid, AnEn_SG) # label_save = ['AnEn', 'AnEn_SG'] tuple_save = (AnEn_grid, ) label_save = [ 'AnEn', ] du.save_hdf5(tuple_save, label_save, REFCST_dir, 'BASE_final_{}_lead{}.hdf'.format(year_fcst, lead))
clim_4km[i, ...] = CLIM[var][mon_id, ...] / mon_days_366[mon_id] clim_regrid[i, ...] = CLIM_REGRID[var][mon_id, ...] / mon_days_366[mon_id] else: clim_4km[i, ...] = CLIM[var][mon_id, ...] / mon_days_365[mon_id] clim_regrid[i, ...] = CLIM_REGRID[var][mon_id, ...] / mon_days_365[mon_id] clim_4km = du.log_trans(clim_4km) clim_regrid = du.log_trans(clim_regrid) clim_regrid[clim_regrid < thres] = 0 # save data data_save = (lon_4km, lat_4km, lon_ncep[domain_ind[0]:domain_ind[1], domain_ind[2]:domain_ind[3]], lat_ncep[domain_ind[0]:domain_ind[1], domain_ind[2]:domain_ind[3]], var_4km, clim_4km, clim_regrid, ncep_var[:, domain_ind[0]:domain_ind[1], domain_ind[2]:domain_ind[3]], etopo_4km, etopo_regrid, land_mask) label_save = [ 'lon_4km', 'lat_4km', 'lon_ncep', 'lat_ncep', '{}_REGRID'.format(var), '{}_CLIM_4km'.format(var), '{}_CLIM_REGRID'.format(var), '{}_originals'.format(var), 'etopo_4km', 'etopo_regrid', 'land_mask' ] du.save_hdf5(data_save, label_save, NCEP_dir, 'NCEP_{}_features_BC_2016_2020.hdf'.format(var))
unet['djf'], param, method='norm_std') elif date.month in [3, 4, 5]: temp_unet = vu.pred_domain(X, land_mask, unet['mam'], param, method='norm_std') elif date.month in [6, 7, 8]: temp_unet = vu.pred_domain(X, land_mask, unet['jja'], param, method='norm_std') elif date.month in [9, 10, 11]: temp_unet = vu.pred_domain(X, land_mask, unet['son'], param, method='norm_std') RESULT_UNET[n, ...] = temp_unet tuple_save = (lon_4km, lat_4km, REGRID_T, RESULT_UNET) label_save = ['lon_4km', 'lat_4km', '{}_REGRID'.format(var), 'RESULT_UNET'] du.save_hdf5(tuple_save, label_save, out_dir=save_dir, filename='ERA_PRED_{}_2018_2020.hdf'.format(var))
bc_inds[2]:bc_inds[3]] era_pct_daily = np.sum(np.reshape(era_pct, (N_days, 8, 48, 112)), axis=1) ERA5 += (era_pct_daily[..., indx, indy], ) ERA5_obs = np.concatenate(ERA5, axis=0) # ========== BCH ========== # OBS = () for key in stn_code: with pd.HDFStore(BACKUP_dir + 'BCH_PREC_QC_1D_2016_2020.hdf', 'r') as hdf_io: pd_temp = hdf_io[key] pd_temp.index = pd_temp['datetime'] pd_temp = pd_temp['2016-01-01':'2019-12-31'] obs_ = pd_temp['PREC_HOUR_QC'].values OBS += (obs_[:, None], ) BCH_obs = np.concatenate(OBS, axis=-1) # ! <--- data cleaning BCH_obs[BCH_obs > 300] = np.nan for i in range(len(stn_code)): BCH_obs[:, i] = bu.clean_no_response(BCH_obs[:, i], tol_window=60 * 8) tuple_save = (ERA5_obs, BCH_obs, flag_pick) label_save = ['ERA5_obs', 'BCH_obs', 'stn_flag'] du.save_hdf5(tuple_save, label_save, save_dir, 'BCH_ERA5_1D_pairs.hdf')
obs = obs[flag_nonan] fcst = fcst[flag_nonan] L = np.sum(flag_nonan) o_bar_ = np.mean(obs) o_bar[d] = o_bar_ for n in range(N_boost): ind_bagging = np.random.choice(L, size=L, replace=True) obs_ = obs[ind_bagging] fcst_ = fcst[ind_bagging] prob_true_, prob_pred_ = reliability_diagram( obs_, fcst_, hist_bins) brier_ = brier_score_loss(obs_, fcst_) prob_true[d, :, n] = prob_true_ prob_pred[d, :, n] = prob_pred_ brier[d, n] = brier_ hist_bins_ = np.mean(prob_pred[d, ...], axis=1) use_, _ = np.histogram(fcst, bins=np.array(list(hist_bins_) + [1.0])) use[d, :] = use_ tuple_save = (brier, prob_true, prob_pred, use, o_bar) label_save = ['brier', 'pos_frac', 'pred_value', 'use', 'o_bar'] du.save_hdf5(tuple_save, label_save, save_dir, '{}_Calib_loc{}.hdf'.format(prefix_out, r))
data_mask = np.squeeze(temp_io.dataset_mask()) N_lon = temp_io.width N_lat = temp_io.height temp_io.close() lon, lat = np.meshgrid(np.arange(bounds[0], bounds[0] + dx * N_lon, dx), np.arange(bounds[1], bounds[1] + dy * N_lat, dy)) # loop over files PRISM_PCT = np.empty((len(file_dir), N_lat, N_lon)) datenum = [] for i, temp_dir in enumerate(file_dir): temp_file = glob(temp_dir + '*.bil')[0] #print(temp_file) temp_io = rasterio.open(temp_file, 'r') temp_data = np.squeeze(temp_io.read()) temp_io.close() # vals temp_data[data_mask == 0] = np.nan temp_data = np.flipud(temp_data) PRISM_PCT[i, ...] = temp_data # datenum date_str = basename(temp_file)[-16:-8] # get date string from filename datenum += du.dt_to_sec([datetime.strptime(date_str, '%Y%m%d')]) # save hdf5 tuple_save = (PRISM_PCT, np.array(datenum), lon, lat) label_save = ['PRISM_{}'.format(var), 'datenum', 'lon', 'lat'] du.save_hdf5(tuple_save, label_save, out_dir=PRISM_dir, filename='PRISM_{}_2015_2020.hdf'.format(var))
prism_025[i, ...] = temp_025 prism_regrid[i, ...] = temp_regrid # collecting fields dict_4km[VAR] = prism_4km dict_025[VAR] = prism_025 dict_regrid[VAR] = prism_regrid # collecting label label_4km.append(VAR + '_4km') label_025.append(VAR + '_025') label_regrid.append(VAR + '_REGRID') # dictionary to tuple tuple_4km = tuple(dict_4km.values()) tuple_025 = tuple(dict_025.values()) tuple_regrid = tuple(dict_regrid.values()) tuple_etopo = (etopo_4km, etopo_025, etopo_regrid) tuple_grids = (lon_025, lat_025, lon_4km, lat_4km, land_mask) # mark labels label_etopo = ['etopo_4km', 'etopo_025', 'etopo_regrid'] label_grids = ['lon_025', 'lat_025', 'lon_4km', 'lat_4km', 'land_mask'] # save hdf tuple_save = tuple_4km + tuple_025 + tuple_regrid + tuple_etopo + tuple_grids label_save = label_4km + label_025 + label_regrid + label_etopo + label_grids du.save_hdf5(tuple_save, label_save, out_dir=PRISM_dir, filename='PRISM_regrid_clim.hdf')
land_mask_025 = hdf_io['land_mask_025'][...] land_mask_terrain_025 = hdf_io['land_mask_terrain_025'][...] land_mask = hdf_io['land_mask'][...] land_mask_terrain_4km = hdf_io['land_mask_terrain'][...] etopo_4km = hdf_io['etopo_4km'][...] etopo_regrid = hdf_io['etopo_regrid'][...] land_mask_025[:, 155:] = True land_mask_025[152:, :] = True land_mask_025[:5, :] = True land_mask_terrain_025[:, 155:] = True land_mask_terrain_025[145:, :] = True land_mask_terrain_025[:5, :] = True # save tuple_save = (lon_4km, lat_4km, lon_025, lat_025, etopo_4km, etopo_regrid, land_mask, land_mask_025, land_mask_terrain_4km, land_mask_terrain_025) label_save = [ 'lon_4km', 'lat_4km', 'lon_025', 'lat_025', 'etopo_4km', 'etopo_regrid', 'land_mask', 'land_mask_025', 'land_mask_terrain', 'land_mask_terrain_025' ] du.save_hdf5(tuple_save, label_save, out_dir=PRISM_dir, filename='land_mask_NA.hdf')
with nc.Dataset(filenames[VAR][file_num], 'r') as nc_io: temp_var[j, ...] = np.squeeze(nc_io.variables[nc_keys[VAR]][...]) era_sfp[i, ...] = f(temp_var, axis=0) print('Lapse rate with linear regression') sorter = np.array( [19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) for i in range(days_ref): print(i) for j in range(grid_shape[0]): for k in range(grid_shape[1]): temp_lev = np.array(A + B * era_sfp[i, j, k]) temp_height = 1000 * metpy.calc.pressure_to_height_std( temp_lev * units.Pa).__array__() temp_temp = era_var[i, :, j, k] era_height[i, :, j, k] = temp_height era_lev[i, :, j, k] = temp_lev ind1 = np.searchsorted(temp_height, temp_height[0] + 1500) ind2 = np.searchsorted(temp_height, temp_height[0] + 2500) era_gamma[i, j, k] = linear_slope(temp_height[ind1:ind2], temp_temp[ind1:ind2]) data_save = (lon_025, lat_025, era_var, era_sfp, era_gamma, era_height, era_lev) label_save = [ 'lon_025', 'lat_025', 'era_var', 'era_sfp', 'era_gamma', 'era_height', 'era_lev' ] du.save_hdf5(data_save, label_save, ERA_dir, 'ERA_TMEAN_GAMMA_2018_2020.hdf')
obs = obs[flag_nonan] fcst = fcst[flag_nonan] L = np.sum(flag_nonan) o_bar_ = np.mean(obs) o_bar[d] = o_bar_ for n in range(N_boost): ind_bagging = np.random.choice(L, size=L, replace=True) obs_ = obs[ind_bagging] fcst_ = fcst[ind_bagging] prob_true_, prob_pred_ = reliability_diagram( obs_, fcst_, hist_bins) brier_ = brier_score_loss(obs_, fcst_) prob_true[d, :, n] = prob_true_ prob_pred[d, :, n] = prob_pred_ brier[d, n] = brier_ hist_bins_ = np.mean(prob_pred[d, ...], axis=1) use_, _ = np.histogram(fcst, bins=np.array(list(hist_bins_) + [1.0])) use[d, :] = use_ tuple_save = (brier, prob_true, prob_pred, use, o_bar) label_save = ['brier', 'pos_frac', 'pred_value', 'use', 'o_bar'] du.save_hdf5(tuple_save, label_save, save_dir, 'GEFS_99th_Calib_loc{}.hdf'.format(r))
with h5py.File(ERA_dir + 'PT_3hr_{}.hdf'.format(year), 'r') as h5io: era_pct = h5io['era_025'][...] PCT_history['{}'.format(year)] = era_pct for year in range(2000, 2020): print("Processing year: {}".format(year)) N_days = (datetime(year + 1, 1, 1) - datetime(year, 1, 1)).days ERA_fcst = np.zeros((N_days, N_fcst, 160, 220)) for day in range(N_days): for t, fcst_temp in enumerate(FCSTs): # fcsted (targeted) date date_true = datetime( year, 1, 1) + timedelta(days=day) + timedelta(hours=fcst_temp) # handling cross years year_true = int(date_true.year) ind_true = int( (date_true - datetime(year_true, 1, 1)).total_seconds() / 60 / 60 / freq) # ERA_fcst[day, t, ...] = PCT_history['{}'.format(year_true)][ind_true, ...] ERA_fcst[ERA_fcst < 1e-5] = 0.0 tuple_save = (ERA_fcst, ) label_save = ['era_fcst'] filename = 'ERA5_GEFS-fcst_{}.hdf'.format(year) du.save_hdf5(tuple_save, label_save, ERA_dir, filename)
o_bar = np.mean(BCH_binary) hist_bins_base = np.mean(prob_pred_base, axis=1) hist_bins_bcnn = np.mean(prob_pred_bcnn, axis=1) hist_bins_sl = np.mean(prob_pred_sl, axis=1) hist_bins_scnn = np.mean(prob_pred_scnn, axis=1) use_base, _ = np.histogram(BASE_prob, bins=np.array(list(hist_bins_base) + [1.0])) use_bcnn, _ = np.histogram(BCNN_prob, bins=np.array(list(hist_bins_bcnn) + [1.0])) use_sl, _ = np.histogram(SL_prob, bins=np.array(list(hist_bins_sl) + [1.0])) use_scnn, _ = np.histogram(SCNN_prob, bins=np.array(list(hist_bins_scnn) + [1.0])) tuple_save = (prob_true_base, prob_true_bcnn, prob_true_sl, prob_true_scnn, prob_pred_base, prob_pred_bcnn, prob_pred_sl, prob_pred_scnn, brier_base, brier_bcnn, brier_sl, brier_scnn, use_base, use_bcnn, use_sl, use_scnn, o_bar) label_save = [ 'prob_true_base', 'prob_true_bcnn', 'prob_true_sl', 'prob_true_scnn', 'prob_pred_base', 'prob_pred_bcnn', 'prob_pred_sl', 'prob_pred_scnn', 'brier_base', 'brier_bcnn', 'brier_sl', 'brier_scnn', 'use_base', 'use_bcnn', 'use_sl', 'use_scnn', 'o_bar' ] du.save_hdf5(tuple_save, label_save, save_dir, 'Accum_Calib_lead{}_loc{}.hdf'.format(lead_, r))
MEAN = MEAN_BCH[flag_pick, ...] # =============== Allocation and MAE computation =============== # # N_days if year % 4 == 0: N_days = 366 else: N_days = 365 # other params N_fcst = 54 EN = 75 N_grids = BCH_obs.shape[-1] MAE = np.empty((N_days, N_fcst, N_grids)) print("Computing MAE ...") for lead in range(N_fcst): print("lead = {}".format(lead)) MAE[:, lead, ...] = np.abs(BCH_obs[:, lead, :] - MEAN[:, lead, :]) tuple_save = (MAE, ) label_save = [ 'MAE', ] du.save_hdf5(tuple_save, label_save, save_dir, 'CLIM_MAE_BCH_{}.hdf'.format(year))
# loop over variables and seasons VARS = ['TMEAN'] for VAR in VARS: # import 3d (time, lat, lon) features with h5py.File(JRA_dir + 'JRA_{}_features_US_2015_2020.hdf'.format(VAR), 'r') as hdf_io: PRISM_T = hdf_io['{}_4km'.format(VAR)][...] REGRID_T = hdf_io['{}_REGRID'.format(VAR)][...] shape_3d = REGRID_T.shape RESULT_CLEAN = np.zeros(shape_3d) RESULT_025 = np.zeros((shape_3d[0],)+lon_025.shape) for n in range(shape_3d[0]): print('\t{}'.format(n)) X = (REGRID_T[n, ...], etopo_regrid) temp_unet = vu.pred_domain(X, land_mask, CGAN, param, method='norm_std') temp_025 = du.interp2d_wraper(lon_4km, lat_4km, temp_unet, lon_025, lat_025, method=interp_method) temp_4km = du.interp2d_wraper(lon_025, lat_025, temp_025, lon_4km, lat_4km, method=interp_method) RESULT_025[n, ...] = temp_025 RESULT_CLEAN[n, ...] = temp_4km RESULT_CLEAN[:, land_mask] = np.nan tuple_save = (lon_4km, lat_4km, PRISM_T, RESULT_CLEAN, RESULT_025, etopo_4km, etopo_regrid) label_save = ['lon_4km', 'lat_4km', '{}_4km'.format(VAR), '{}_REGRID'.format(VAR), '{}_025'.format(VAR), 'etopo_4km', 'etopo_regrid'] du.save_hdf5(tuple_save, label_save, out_dir=JRA_dir, filename='JRA_US_{}_clean_2015_2020.hdf'.format(VAR))
# ========== Region subsets ========== # south = [-130, -121, 48.75, 50.25] north = [-127.5, -110, 53, 60] loc_id = [] # 0 van isl, 1 south, 2 rocky, 3 north for i in range(len(stn_code)): stn_lat_temp = stn_lat[i] stn_lon_temp = stn_lon[i] if du.check_bounds(stn_lon_temp, stn_lat_temp, south): loc_id.append(1) elif du.check_bounds(stn_lon_temp, stn_lat_temp, north): loc_id.append(3) else: loc_id.append(2) loc_id = np.array(loc_id) flag_sw = loc_id == 1 flag_si = loc_id == 2 flag_n = loc_id == 3 # ========== Save ========== # tuple_save = (stn_lon, stn_lat, flag_sw, flag_si, flag_n) label_save = ['stn_lon', 'stn_lat', 'flag_sw', 'flag_si', 'flag_n'] du.save_hdf5(tuple_save, label_save, save_dir, 'BCH_wshed_groups.hdf')
# id 0 and 1 are flattened grid points, reshape them to 2d. AnEn_full[..., ~land_mask_bc] = AnEn_ AnEn_stn = AnEn_full[..., indx, indy] else: AnEn_stn = AnEn_[..., indx, indy] # cnn outputs can be negative, fix it here. AnEn_stn = ana.cnn_precip_fix(AnEn_stn) # extracting the 90-th threshold for initializaiton time + lead time for mon in range(12): flag_ = flag_pick[:, lead] == mon # stn obs obs_ = BCH_obs[flag_, lead, :] # fcst pred_ = AnEn_stn[flag_, ...] # station-wise threshold thres_ = BCH_90th[mon, :] # Brier Score ( ">=" is applied) obs_flag = obs_ >= thres_ obs_flag[np.isnan(obs_)] = np.nan pred_flag = pred_ >= thres_ BS[flag_, lead, :] = metrics.BS_binary_1d_nan(obs_flag, pred_flag) # save (all lead times, per year, per experiment) tuple_save = (BS, BCH_90th) label_save = ['BS', 'stn_90th'] du.save_hdf5(tuple_save, label_save, save_dir, '{}_BS_BCH_{}.hdf'.format(prefix_out, year))
Z_l[land_mask] = 0 Z_m = gaussian_filter(etopo_025, 5 / np.pi) Z_m[land_mask] = 0 Z_h = np.copy(etopo_025) Z_h[land_mask] = 0 facet_h = facet(Z_h) facet_m = facet(Z_m) facet_l = facet(Z_l) facet_h[land_mask] = np.nan facet_m[land_mask] = np.nan facet_l[land_mask] = np.nan sigma_facet = window_stdev_slow(etopo_025, radius=5) W_facet = sigma_facet / np.nanmax(sigma_facet) # W_025 = 0.6*(sigma025-15) # W_025[W_025>0.6] = 0.6 # W_025[W_025<0.2] = 0.2 # W_025[land_mask] = np.nan W_025 = 0.8 * (sigma025 - 15) W_025[W_025 > 0.8] = 0.8 W_025[W_025 < 0.2] = 0.2 W_025[land_mask] = np.nan tuple_save = (facet_h, facet_m, facet_l, W_facet, W_025) label_save = ['facet_h', 'facet_m', 'facet_l', 'W_facet', 'W_SL'] du.save_hdf5(tuple_save, label_save, save_dir, 'NA_SL_info.hdf')