def taylor_count(path_in, file_ref, varname_ref, file_vr, varname_vr, file_rcm, varname_rcm): "计算泰勒图诸要素,进行了归一化处理 \ ,path_in 数据主要路径,file_ref参考数据路径,file_vr,vr数据,varname_ref数据中变量名 \ " import xarray as xr import numpy as np import seaborn as sns import pandas as pd import matplotlib.pyplot as plt import skill_metrics as sm ds_obs = xr.open_dataset(path_in + "/" + file_ref) model_set = {} # 存放不同模式结果 model_set["vr"] = xr.open_dataset(path_in + "/" + file_vr) model_set["rcm"] = xr.open_dataset(path_in + "/" + file_rcm) months = ["4", "5", "6", "7", "8"] model_types = ["vr", "rcm"] model_stats = {} # 存放 VR RCM 的位置 for model_type in model_types: month_stats = {} # 存放逐月的统计量 # add every month for month_ind in months: # count month mean months_obs_mean = ds_obs[varname_ref].loc[ ds_obs.time.dt.month == int(month_ind)].mean(dim=["time"]) months_vr_mean = model_set[model_type][varname_vr].loc[ model_set[model_type].Time.dt.month == int(month_ind)].mean( dim=["Time"]) # ND to 1D temp_obs = months_obs_mean.values.ravel() temp_vr = months_vr_mean.values.ravel() # remove NaN temp_obs = temp_obs[~np.isnan(temp_obs)] temp_vr = temp_vr[~np.isnan(temp_vr)] # count taylor stats # pred1 , refer # month_stats[month_ind] = sm.taylor_statistics(np.array(months_mod_mean).ravel(),np.array(months_obs_mean).ravel()) # add month_stats[month_ind] = sm.taylor_statistics(temp_vr, temp_obs) # add all year # count year(4-8 months) mean months_obs_mean = ds_obs[varname_ref].mean(dim=["time"]) months_vr_mean = model_set[model_type][varname_vr].mean(dim=["Time"]) # ND to 1D temp_obs = months_obs_mean.values.ravel() temp_vr = months_vr_mean.values.ravel() # remove NaN temp_obs = temp_obs[~np.isnan(temp_obs)] temp_vr = temp_vr[~np.isnan(temp_vr)] # add month_stats['all'] = sm.taylor_statistics(temp_vr, temp_obs) # add module set model_stats[model_type] = month_stats year_select = ["2004", "2005", "2006", "2007", "2008"] months = ["4", "5", "6", "7", "8", "all"] model_types = ["vr", "rcm"] model_plot = {} # 存放不同模式的taylor plot的结果 # 将泰勒图诸要素整理到 model_plot 中,并接着绘图 for model_type in model_types: sdev = [] crmsd = [] ccoef = [] sdev_obs = [] # append obs #----- normilized ----- sdev.append(model_stats[model_type][month_ind]['sdev'][0] / model_stats[model_type][month_ind]['sdev'][0]) crmsd.append(model_stats[model_type][month_ind]['crmsd'][0] / model_stats[model_type][month_ind]['sdev'][0]) ccoef.append(model_stats[model_type][month_ind]['ccoef'][0]) for month_ind in months: # statistics can be normalized # obs sdev=1 crmsd=0 ccoef=1 # append 4-8 months #----- normilized ----- sdev.append(model_stats[model_type][month_ind]['sdev'][1] / model_stats[model_type][month_ind]['sdev'][0]) crmsd.append(model_stats[model_type][month_ind]['crmsd'][1] / model_stats[model_type][month_ind]['sdev'][0]) ccoef.append(model_stats[model_type][month_ind]['ccoef'][1]) # append all round year sdev = np.array(sdev) crmsd = np.array(crmsd) ccoef = np.array(ccoef) # add to model plot model_plot[model_type] = {"sdev": sdev, "crmsd": crmsd, "ccoef": ccoef} return model_plot
self.ref = ref if __name__ == '__main__': # Close any previously open graphics windows # ToDo: fails to work within Eclipse plt.close('all') # Read data from pickle file data = load_obj('taylor_data') # Calculate statistics for Taylor diagram # The first array element corresponds to the reference series # for the while the second is that for the predicted series. taylor_stats1 = sm.taylor_statistics(data.pred1, data.ref, 'data') taylor_stats2 = sm.taylor_statistics(data.pred2, data.ref, 'data') taylor_stats3 = sm.taylor_statistics(data.pred3, data.ref, 'data') # Store statistics in arrays sdev = np.array([ taylor_stats1['sdev'][0], taylor_stats1['sdev'][1], taylor_stats2['sdev'][1], taylor_stats3['sdev'][1] ]) crmsd = np.array([ taylor_stats1['crmsd'][0], taylor_stats1['crmsd'][1], taylor_stats2['crmsd'][1], taylor_stats3['crmsd'][1] ]) ccoef = np.array([ taylor_stats1['ccoef'][0], taylor_stats1['ccoef'][1], taylor_stats2['ccoef'][1], taylor_stats3['ccoef'][1]
#!/usr/bin/python import matplotlib.pyplot as plt import numpy as np import pickle import skill_metrics as sm from sys import version_info dataobs = [0, 0, 0, 0, 0, 0, 0, 10, 44, 9, 0, 0, 0, 0, 0, 0, 0, 0] datamodel = [0, 0, 0, 0, 0, 0, 2.2, 9.5, 0.4, 0, 0, 0, 0, 0, 0, 0, 0, 0] datamodel2 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1, 0.1, 0, 0] test = sm.taylor_statistics(datamodel, dataobs, 'data') test2 = sm.taylor_statistics(datamodel2, dataobs, 'data') # Store statistics in arrays sdev = np.array([test['sdev'][0], test['sdev'][1], test2['sdev'][1]]) crmsd = np.array([test['crmsd'][0], test['crmsd'][1], test2['crmsd'][1]]) ccoef = np.array([test['ccoef'][0], test['ccoef'][1], test2['ccoef'][1]]) #print sdev #print data.ref sm.taylor_diagram(sdev, crmsd, ccoef) # Show plot plt.show()
np.savetxt(path + 'table_regularization.csv', table, delimiter=",", fmt='%s') # ------------------------------------------------------------------------------ # -------------------------------- PLOT ---------------------------------------- # ------------------------------------------------------------------------------ # Set the figure properties (optional) rcParams["figure.figsize"] = [10.0, 8] rcParams['lines.linewidth'] = 1.25 # line width for plots rcParams.update({'font.size': 13}) # font size of axes text # Close any previously open graphics windows # ToDo: fails to work within Eclipse plt.close('all') taylor_stats1 = sm.taylor_statistics(MWF_nnls, True_fM) taylor_stats2 = sm.taylor_statistics(MWF_X2_I, True_fM) taylor_stats3 = sm.taylor_statistics(MWF_X2_L1, True_fM) taylor_stats4 = sm.taylor_statistics(MWF_X2_L2, True_fM) taylor_stats5 = sm.taylor_statistics(MWF_Lcurve_I, True_fM) taylor_stats6 = sm.taylor_statistics(MWF_Lcurve_L1, True_fM) taylor_stats7 = sm.taylor_statistics(MWF_Lcurve_L2, True_fM) taylor_stats8 = sm.taylor_statistics(MWF_GCV_I, True_fM) taylor_stats9 = sm.taylor_statistics(MWF_GCV_L1, True_fM) taylor_stats10 = sm.taylor_statistics(MWF_GCV_L2, True_fM) target_stats1 = sm.target_statistics(MWF_nnls, True_fM) target_stats2 = sm.target_statistics(MWF_X2_I, True_fM) target_stats3 = sm.target_statistics(MWF_X2_L1, True_fM) target_stats4 = sm.target_statistics(MWF_X2_L2, True_fM) target_stats5 = sm.target_statistics(MWF_Lcurve_I, True_fM)
labels = ['Non-Dimensional Observation', 'POI', 'NB', 'ZIP', "ZINB", "RF"] pred_poi = m[:, 0] pred_nb = m[:, 1] pred_zip = m[:, 2] pred_zinb = m[:, 3] pred_rf = m[:, 4] target_mod_poi = sm.target_statistics(pred_poi, t, 'data') target_mod_nb = sm.target_statistics(pred_nb, t, 'data') target_mod_zip = sm.target_statistics(pred_zip, t, 'data') target_mod_zinb = sm.target_statistics(pred_zinb, t, 'data') target_mod_rf = sm.target_statistics(pred_rf, t, 'data') taylor_mod_poi = sm.taylor_statistics(pred_poi, t, 'data') taylor_mod_nb = sm.taylor_statistics(pred_nb, t, 'data') taylor_mod_zip = sm.taylor_statistics(pred_zip, t, 'data') taylor_mod_zinb = sm.taylor_statistics(pred_zinb, t, 'data') taylor_mod_rf = sm.taylor_statistics(pred_rf, t, 'data') target_bias = np.array([ target_mod_poi['bias'], target_mod_nb['bias'], target_mod_zip['bias'], target_mod_zinb["bias"], target_mod_rf["bias"] ]) target_crmsd = np.array([ target_mod_poi['crmsd'], target_mod_nb['crmsd'], target_mod_zip['crmsd'], target_mod_zinb["crmsd"], target_mod_rf["crmsd"] ]) target_rmsd = np.array([ target_mod_poi['rmsd'], target_mod_nb['rmsd'], target_mod_zip['rmsd'],
def main(): #import actual observation dataframe (BNU product) f = netCDF4.Dataset('BNU_2000-2010.nc4') LAI = f.variables['LAI'] latt, lonn = f.variables['latitude'][:], f.variables['longitude'][:] #slice out Kenya latli_k, latui_k, lonli_k, lonui_k = slice([-4.75, 4.75], [33.25, 41.75], latt, lonn) #slice out Tanzania latli_t, latui_t, lonli_t, lonui_t = slice([-11.75, -1.25], [28.25, 40.75], latt, lonn) #observation slice by concatenating Kenya & Tanzania fSubset = np.concatenate([(f.variables['LAI'][:, latli_k:latui_k, lonli_k:lonui_k]).flatten(), (f.variables['LAI'][:, latli_t:latui_t, lonli_t:lonui_t]).flatten()]) #import all BG1 model dataframe, subset to 2000-2010 model_dict = {} mypath = '/Users/nhuang37/Desktop/NYU DS/Yr1 Summer/Data/LAI/BG1' pathlist = [ f for f in listdir(mypath) if isfile(join(mypath, f)) and f.endswith('nc4') ] for file in pathlist: print(file) name = file.split('_')[0] + file.split('_')[-1] #tidy up the name f_model = netCDF4.Dataset(mypath + str('/') + file) f_model.set_auto_mask(False) #missing value = -9999.0 # subset 2000-2010 time = f_model.variables['time'] dates = num2date(time[:], time.units) start_date = datetime.datetime(2000, 1, 1) f_model_sub = np.concatenate([ f_model.variables['LAI'][np.where(dates[dates > start_date])[0], latli_k:latui_k, lonli_k:lonui_k].flatten(), f_model.variables['LAI'][np.where(dates[dates > start_date])[0], latli_t:latui_t, lonli_t:lonui_t].flatten() ]) #set missing value to 0 f_model_sub[f_model_sub == -9999.0] = 0 model_dict[name] = f_model_sub # add multi-model mean slice to the model dictionary model_dict['BG1_mean'] = multi_model_mean_slice( '/Users/nhuang37/Desktop/NYU DS/Yr1 Summer/Data/LAI/BG1/BG1_mean.txt') model_dict['SG3_mean'] = multi_model_mean_slice( '/Users/nhuang37/Desktop/NYU DS/Yr1 Summer/Data/LAI/BG1/SG3_mean.txt') # add LAI3G (another actual observation product) for comparison LAI3G = netCDF4.Dataset( '/Users/nhuang37/Desktop/NYU DS/Yr1 Summer/Data/LAI/Taylor Diagram/LAI3G_regrid.nc' ) # subset 2000-2010 (-132), Kenya & Tanzania model_dict['LAI3G'] = np.concatenate([ LAI3G.variables['LAI'][-132:, latli_k:latui_k, lonli_k:lonui_k].flatten(), LAI3G.variables['LAI'][-132:, latli_t:latui_t, lonli_t:lonui_t].flatten() ]) / 1000 #divide by 1000 to rescale # Make the Taylor Diagram # Set the figure properties (optional) rcParams["figure.figsize"] = [8.0, 6.4] rcParams['lines.linewidth'] = 1 # line width for plots rcParams.update({'font.size': 8}) # font size of axes text # Calculate statistics for Taylor diagram # The first array element (e.g. taylor_stats1[0]) corresponds to the # reference series while the second and subsequent elements # (e.g. taylor_stats1[1:]) are those for the predicted series. for i, model in enumerate(model_dict): #remove all 0 elements in model & ref (0 represents ocean grid cells, not useful for LAI correlation) taylor_stats = sm.taylor_statistics(model_dict[model][fSubset > 0], fSubset[fSubset > 0]) if i == 0: sdev, crmsd, ccoef = [taylor_stats['sdev'][0] ], [taylor_stats['crmsd'][0] ], [taylor_stats['ccoef'][0]] sdev.append(taylor_stats['sdev'][1]) crmsd.append(taylor_stats['crmsd'][1]) ccoef.append(taylor_stats['ccoef'][1]) sdev, crmsd, ccoef = np.array(sdev), np.array(crmsd), np.array(ccoef) #get labels label = list(model_dict.keys()) label.insert(0, 'obs') #sort by correlation ccoef result = sorted(zip(label, sdev, crmsd, ccoef), key=lambda x: x[3], reverse=True) #unzip the result with sorted order label, sdev, crmsd, ccoef = zip(*result) sdev, crmsd, ccoef = np.array(sdev), np.array(crmsd), np.array(ccoef) #print out the result print(label, ccoef) #plot Taylor Diagram sm.taylor_diagram(sdev, crmsd, ccoef, markerLabel=list(label), markerLabelColor='r', markerLegend='on', markerColor='r', styleOBS='-', colOBS='r', markerobs='o', markerSize=6, tickRMS=[0.0, 1.0, 2.0, 3.0], tickRMSangle=115, showlabelsRMS='on', titleRMS='on', titleOBS='Ref', checkstats='on') plt.savefig('taylor_BG1_BNU_LAI3G.png', dpi=300)
transform=ax.transAxes, fontsize=14, verticalalignment='top', bbox=props) plt.show() fig4.savefig(output_path + os.sep + 'Comp_OMET_26.5N_RAPID_hindcast_time_series.jpg', dpi=400) print '*******************************************************************' print '********************** Taylor Diagram ***********************' print '*******************************************************************' # statistical operation inside skill_metrics function # the time series must have the same size # RAPID array observation is taken as reference taylor_stats1 = sm.taylor_statistics(OMET_RAPID_monthly, OMET_RAPID_monthly) taylor_stats2 = sm.taylor_statistics(OMET_ORAS4_RAPID_series[3:], OMET_RAPID_monthly[:-10]) taylor_stats3 = sm.taylor_statistics(OMET_GLORYS2V3_RAPID_series[3:], OMET_RAPID_monthly[:-10]) taylor_stats4 = sm.taylor_statistics(OMET_SODA3_RAPID_series[3:-2], OMET_RAPID_monthly) taylor_stats5 = sm.taylor_statistics(OMET_hindcast_series[46 * 12 + 3:], OMET_RAPID_monthly[:-34]) # Store statistics in arrays # Specify labels for points in a cell array (M1 for model prediction 1, # etc.). Note that a label needs to be specified for the reference even # though it is not used. label = [ 'RAPID ARRAY Obs', 'RAPID ARRAY', 'ORAS4', 'GLORYS2V3', 'SODA3', 'NEMO ORCA'
def make_model_evaluation(df_nonnan, model_path, ls_pred_dt, cfg_tds, cfg_op): X_test_ls = [] y_test_ls = [] cmap_pred_dt = plt.cm.get_cmap('viridis_r') ## Import dictionary with selected models: train_path_name = os.path.join( model_path, "model_dict_t0diff_maxdepth6_selfeat_gain.pkl") with open(train_path_name, "rb") as file: dict_sel_model = pickle.load(file) plt.close() fig = plt.figure(num=1, figsize=(7, 6)) ## Loop over lead times: for i, pred_dt in enumerate(ls_pred_dt): if i == 0: xgb_model_ls = [] pred_model_ls = [] Rank_obs_ls = [] top_features_ls = [] df_param_ls_diff = [] df_param_ls_rank = [] df_param_ls_rank_PM = [] df_param_ls_rank_pers = [] Rank_pred_XGB_ls = [] Rank_pred_XGB_PM_ls = [] if len(X_test_ls) == len(ls_pred_dt) and len(y_test_ls) == len( ls_pred_dt): X_test = X_test_ls[i] y_test = y_test_ls[i] else: if i == 0: X_test_ls = [] y_test_ls = [] X_train, X_test, y_train, y_test = ipt.get_model_input( df_nonnan, del_TRTeqZero_tpred=True, split_Xy_traintest=True, X_normalise=False, pred_dt=pred_dt, check_for_nans=False, verbose=True) del (X_train, y_train) X_test_ls.append(X_test) y_test_ls.append(y_test) ## Load XGB model fitted to all features: with open( os.path.join(model_path, "model_%i_t0diff_maxdepth6.pkl" % pred_dt), "rb") as file: xgb_model_feat = pickle.load(file) xgb_model_ls.append(xgb_model_feat) top_features = pd.DataFrame.from_dict( xgb_model_feat.get_booster().get_score(importance_type='gain'), orient="index", columns=["F_score"]).sort_values(by=['F_score'], ascending=False) top_features_ls.append(top_features) ## Get specific predictive model for this leadtime: pred_model = dict_sel_model["pred_mod_%i" % pred_dt] pred_model_ls.append(pred_model) ## Check that features agree: features_pred_model = pred_model.get_booster().feature_names n_features = len(features_pred_model) if set(features_pred_model) != set(top_features.index[:n_features]): raise ValueError( "Features of predictive model and top features of model fitted with all features do not agree" ) ## Make prediction of TRT Rank differences: TRT_diff_pred = pred_model.predict(X_test[features_pred_model]) ## Get set of different TRT Rank predictions: Rank_obs, Rank_pred_XGB, Rank_pred_XGB_PM, Rank_pred_pers, Rank_pred_pers_PM, \ Rank_pred_diff, Diff_pred_XGB = get_obs_fcst_TRT_Rank(X_test["TRT_Rank|0"], TRT_diff_pred, y_test, X_test["TRT_Rank|-5"]) Rank_obs_ls.append(Rank_obs) Rank_pred_XGB_ls.append(Rank_pred_XGB) Rank_pred_XGB_PM_ls.append(Rank_pred_XGB_PM) ## Plot scatterplots obs vs. predicted: plot_pred_vs_obs_core(y_test, Diff_pred_XGB.values, pred_dt, "_XGB%i" % n_features, cfg_tds, outtype="TRT_Rank_diff") plot_pred_vs_obs_core(Rank_obs, Rank_pred_XGB.values, pred_dt, "_XGB%i" % n_features, cfg_tds, outtype="TRT_Rank") plot_pred_vs_obs_core(Rank_obs, Rank_pred_XGB_PM.values, pred_dt, "_XGB%i-ProbMatch" % n_features, cfg_tds, outtype="TRT_Rank") plot_pred_vs_obs_core(Rank_obs, Rank_pred_pers.values, pred_dt, "_Pers", cfg_tds, outtype="TRT_Rank") plot_pred_vs_obs_core(Rank_obs, Rank_pred_pers_PM.values, pred_dt, "_Pers-ProbMatch", cfg_tds, outtype="TRT_Rank") plot_pred_vs_obs_core(Rank_obs, Rank_pred_diff.values, pred_dt, "_ConstDiff", cfg_tds, outtype="TRT_Rank") ## Calculate different term elements for R^2 / Brier Score calculation: df_param_ls_diff.append( get_R2_param(y_test.values, Diff_pred_XGB.values)) df_param_ls_rank.append( get_R2_param(Rank_obs.values, Rank_pred_XGB.values)) df_param_ls_rank_PM.append( get_R2_param(Rank_obs.values, Rank_pred_XGB_PM.values)) df_param_ls_rank_pers.append( get_R2_param(Rank_obs.values, Rank_pred_pers.values)) ## Calculate statistics for Taylor Diagram: stat_pred_XGB = sm.taylor_statistics(predicted=Rank_pred_XGB.values, reference=Rank_obs.values) stat_pred_XGB_PM = sm.taylor_statistics( predicted=Rank_pred_XGB_PM.values, reference=Rank_obs.values) stat_pred_pred_pers = sm.taylor_statistics( predicted=Rank_pred_pers.values, reference=Rank_obs.values) stat_pred_pred_diff = sm.taylor_statistics( predicted=Rank_pred_diff.values, reference=Rank_obs.values) stat_pred_pred_pers_PM = sm.taylor_statistics( predicted=Rank_pred_pers_PM.values, reference=Rank_obs.values) sdev = np.array([ stat_pred_XGB['sdev'][0], stat_pred_XGB['sdev'][1], stat_pred_XGB_PM['sdev'][1], stat_pred_pred_pers['sdev'][1] ]) crmsd = np.array([ stat_pred_XGB['crmsd'][0], stat_pred_XGB['crmsd'][1], stat_pred_XGB_PM['crmsd'][1], stat_pred_pred_pers['crmsd'][1] ]) ccoef = np.array([ stat_pred_XGB['ccoef'][0], stat_pred_XGB['ccoef'][1], stat_pred_XGB_PM['ccoef'][1], stat_pred_pred_pers['ccoef'][1] ]) #sdev = np.array([stat_pred_XGB['sdev'][0], stat_pred_XGB['sdev'][1], stat_pred_XGB_PM['sdev'][1], stat_pred_pred_pers['sdev'][1], stat_pred_pred_diff['sdev'][1]]) #crmsd = np.array([stat_pred_XGB['crmsd'][0], stat_pred_XGB['crmsd'][1], stat_pred_XGB_PM['crmsd'][1], stat_pred_pred_pers['crmsd'][1], stat_pred_pred_diff['crmsd'][1]]) #ccoef = np.array([stat_pred_XGB['ccoef'][0], stat_pred_XGB['ccoef'][1], stat_pred_XGB_PM['ccoef'][1], stat_pred_pred_pers['ccoef'][1], stat_pred_pred_diff['ccoef'][1]]) ## Plot Taylor Diagram: col_point = cmap_pred_dt(float(i) / len(ls_pred_dt)) col_point = (col_point[0], col_point[1], col_point[2], 0.8) plot_markerLabel = ["Obs", "+%imin" % pred_dt, "", ""] plot_markerLabelColor = "black" if i == 0: plot_markerLegend = 'on' plot_overlay = 'off' else: plot_markerLegend = "on" plot_overlay = 'on' #plot_markerLabelColor = None if i == len(ls_pred_dt) - 1: plot_markerLabelColor = None plot_markerLabel = ["Obs", "XGB", "XGB (PM)", "Persistance"] sm.taylor_diagram( sdev / sdev[0], crmsd, ccoef, styleOBS='-', colOBS='darkred', markerobs='o', titleOBS='Obs', markerLabel=plot_markerLabel, markerLabelColor=plot_markerLabelColor, alpha=0.1, markerColor=col_point, markerLegend=plot_markerLegend, axismax=1.2, markerSize=5, colRMS='grey', styleRMS='--', widthRMS=0.8, rincRMS=0.25, tickRMS=np.arange(0.25, 1.5, 0.25), #titleRMSangle = 110, colSTD='grey', styleSTD='-.', widthSTD=0.8, colCOR='grey', styleCOR=':', widthCOR=0.8, overlay=plot_overlay) ## Save Taylor Diagram: get_time_delta_colorbar(fig, ls_pred_dt, cmap_pred_dt, [0.7, 0.5, 0.05, 0.3]) plt.savefig( os.path.join(cfg_tds["fig_output_path"], "Taylor_Diagram_cmap.pdf")) plt.close() ## Plot histogram showing the effect of probability matching: print( "Save dataframe with observed, predicted, and predicted & PM TRT Ranks" ) Rank_obs_df = pd.concat(Rank_obs_ls, axis=1, sort=True) Rank_obs_df.columns = [ "TRT_Rank_obs|%i" % pred_dt for pred_dt in ls_pred_dt ] Rank_pred_XGB_df = pd.concat(Rank_pred_XGB_ls, axis=1, sort=True) Rank_pred_XGB_df.columns = [ "TRT_Rank_pred|%i" % pred_dt for pred_dt in ls_pred_dt ] Rank_pred_XGB_PM_df = pd.concat(Rank_pred_XGB_PM_ls, axis=1, sort=True) Rank_pred_XGB_PM_df.columns = [ "TRT_Rank_pred_PM|%i" % pred_dt for pred_dt in ls_pred_dt ] #plot_hist_probmatch(Rank_pred_XGB_df, Rank_pred_XGB_PM_df) Rank_obs_pred_df = pd.concat( [Rank_obs_df, Rank_pred_XGB_df, Rank_pred_XGB_PM_df], axis=1, sort=True) ## Get dataframe with observed, predicted, and predicted & PM TRT Ranks for operational PM: op_path_name = os.path.join(cfg_op["XGB_model_path"], "TRT_Rank_obs_pred.pkl") with open(op_path_name, "wb") as file: pickle.dump(Rank_obs_pred_df, file, protocol=2) print(" saved dict to 'XGB_model_path' location:\n %s" % op_path_name) prt_txt = """ --------------------------------------------------------------------------------- The file 'TRT_Rank_obs_pred.pkl' in the directory '%s' is now used for the operational probability matching procedure, be aware of that! ---------------------------------------------------------------------------------\n""" % ( cfg_op["XGB_model_path"]) print(prt_txt) ## Plot skill scores as function of lead-time: df_R2_param_rank = pd.concat(df_param_ls_rank, axis=0).set_index(np.array(ls_pred_dt)) df_R2_param_rank_PM = pd.concat(df_param_ls_rank_PM, axis=0).set_index(np.array(ls_pred_dt)) df_R2_param_diff = pd.concat(df_param_ls_diff, axis=0).set_index(np.array(ls_pred_dt)) df_R2_param_rank_pers = pd.concat(df_param_ls_rank_pers, axis=0).set_index(np.array(ls_pred_dt)) plot_stats(df_R2_param_rank, "TRT_Rank", cfg_tds) plot_stats(df_R2_param_diff, "TRT_Rank_diff", cfg_tds) plot_stats_nice(df_R2_param_rank, "TRT_Rank", cfg_tds) plot_stats_nice(df_R2_param_diff, "TRT_Rank_diff", cfg_tds) plot_stats_nice(df_R2_param_rank_pers, "TRT_Rank_pers", cfg_tds) plot_stats_nice(df_R2_param_rank_PM, "TRT_Rank_PM", cfg_tds) ## Print IDs of long TRT cells in testing dataset: print( "\nThese are the IDs of long TRT cells (>25 time steps) in the testing dataset:" ) TRT_ID = X_test_ls[-1].index TRT_ID = [TRT_ID_i[13:] for TRT_ID_i in TRT_ID.values] TRT_ID_count = Counter(TRT_ID) TRT_ID_count_sort = [ (k, TRT_ID_count[k]) for k in sorted(TRT_ID_count, key=TRT_ID_count.get, reverse=True) ] TRT_ID_count_sort_pd = pd.DataFrame(np.array(TRT_ID_count_sort), columns=["TRT_ID", "Count"]) TRT_ID_count_sort_pd["Count"] = TRT_ID_count_sort_pd["Count"].astype( np.uint16, inplace=True) TRT_ID_long = TRT_ID_count_sort_pd.loc[TRT_ID_count_sort_pd["Count"] > 25] print(TRT_ID_long) TRT_ID_casestudy = [ "2018080721250094", "2018080721300099", "2018080711400069", "2018080710200036" ] print(" Making analysis for TRT IDs (hardcoded!): %s" % TRT_ID_casestudy) TRT_ID_long_sel = TRT_ID_long.loc[TRT_ID_long['TRT_ID'].isin( TRT_ID_casestudy)] df_feature_ts_plot = pd.DataFrame.from_dict({ "Radar": ["CZC_lt57dBZ|-45|SUM", "CZC_lt57dBZ|-45|SUM", "CZC_lt57dBZ|-45|SUM"], "Satellite": [ "IR_097_stat|-20|PERC05", "IR_097_stat|-15|PERC01", "IR_097_stat|-20|MIN" ], "COSMO": [ "CAPE_MU_stat|-10|PERC50", "CAPE_MU_stat|-5|PERC75", "CAPE_ML_stat|0|SUM" ], "Lightning": [ "THX_densIC_stat|-30|SUM", "THX_curr_pos_stat|-40|SUM", "THX_curr_pos_stat|-30|SUM" ] }) for i_sel in range(len(TRT_ID_long_sel)): print(" Working on cell %s" % TRT_ID_long_sel.iloc[i_sel]["TRT_ID"]) plot_pred_time_series(TRT_ID_long_sel.iloc[i_sel], df_nonnan, Rank_pred_XGB_ls, ls_pred_dt, cfg_tds) plot_pred_time_series(TRT_ID_long_sel.iloc[i_sel], df_nonnan, Rank_pred_XGB_PM_ls, ls_pred_dt, cfg_tds, path_addon="PM", title_addon=" (PM)") plot_var_time_series_dt0_multiquant(TRT_ID_long_sel.iloc[i_sel], df_nonnan, cfg_tds) for i_pred_dt, pred_dt in enumerate([10, 20, 30]): fig = plt.figure(figsize=[10, 6]) ax_rad = fig.add_subplot(2, 2, 1) ax_sat = fig.add_subplot(2, 2, 2) ax_cos = fig.add_subplot(2, 2, 3) ax_thx = fig.add_subplot(2, 2, 4) ax_ls = [ax_rad, ax_sat, ax_cos, ax_thx] #fig, axes = plt.subplots(2,2) #fig.set_size_inches(8,6) for i_source, source in enumerate( ["Radar", "Satellite", "COSMO", "Lightning"]): ls_feat_param = df_feature_ts_plot[source].iloc[ i_pred_dt].split("|") past_dt = np.arange(-45, 0, 5) if int(ls_feat_param[1]) != 0 else [0] ax_ls[i_source] = plot_var_time_series( TRT_ID_long_sel.iloc[i_sel], df_nonnan, ls_feat_param[0], ls_feat_param[2], past_dt=past_dt, dt_highlight=int(ls_feat_param[1]), ax=ax_ls[i_source]) plt.tight_layout() plt.savefig( os.path.join( cfg_tds["fig_output_path"], "Feat_series_%i_%s.pdf" % (pred_dt, TRT_ID_long_sel.iloc[i_sel]["TRT_ID"]))) plt.close()