def taylor_count(path_in, file_ref, varname_ref, file_vr, varname_vr, file_rcm,
                 varname_rcm):
    "计算泰勒图诸要素,进行了归一化处理 \
    ,path_in 数据主要路径,file_ref参考数据路径,file_vr,vr数据,varname_ref数据中变量名 \
    "

    import xarray as xr
    import numpy as np
    import seaborn as sns
    import pandas as pd
    import matplotlib.pyplot as plt
    import skill_metrics as sm

    ds_obs = xr.open_dataset(path_in + "/" + file_ref)
    model_set = {}  # 存放不同模式结果
    model_set["vr"] = xr.open_dataset(path_in + "/" + file_vr)
    model_set["rcm"] = xr.open_dataset(path_in + "/" + file_rcm)

    months = ["4", "5", "6", "7", "8"]
    model_types = ["vr", "rcm"]
    model_stats = {}  # 存放 VR RCM 的位置

    for model_type in model_types:
        month_stats = {}  # 存放逐月的统计量
        # add every month
        for month_ind in months:
            # count month mean
            months_obs_mean = ds_obs[varname_ref].loc[
                ds_obs.time.dt.month == int(month_ind)].mean(dim=["time"])
            months_vr_mean = model_set[model_type][varname_vr].loc[
                model_set[model_type].Time.dt.month == int(month_ind)].mean(
                    dim=["Time"])
            # ND to 1D
            temp_obs = months_obs_mean.values.ravel()
            temp_vr = months_vr_mean.values.ravel()
            # remove NaN
            temp_obs = temp_obs[~np.isnan(temp_obs)]
            temp_vr = temp_vr[~np.isnan(temp_vr)]
            # count taylor stats
            # pred1 , refer
            # month_stats[month_ind] = sm.taylor_statistics(np.array(months_mod_mean).ravel(),np.array(months_obs_mean).ravel())
            # add
            month_stats[month_ind] = sm.taylor_statistics(temp_vr, temp_obs)
        # add all year
        # count year(4-8 months) mean
        months_obs_mean = ds_obs[varname_ref].mean(dim=["time"])
        months_vr_mean = model_set[model_type][varname_vr].mean(dim=["Time"])
        # ND to 1D
        temp_obs = months_obs_mean.values.ravel()
        temp_vr = months_vr_mean.values.ravel()
        # remove NaN
        temp_obs = temp_obs[~np.isnan(temp_obs)]
        temp_vr = temp_vr[~np.isnan(temp_vr)]
        # add
        month_stats['all'] = sm.taylor_statistics(temp_vr, temp_obs)
        # add module set
        model_stats[model_type] = month_stats
    year_select = ["2004", "2005", "2006", "2007", "2008"]
    months = ["4", "5", "6", "7", "8", "all"]
    model_types = ["vr", "rcm"]
    model_plot = {}  # 存放不同模式的taylor plot的结果

    # 将泰勒图诸要素整理到 model_plot 中,并接着绘图
    for model_type in model_types:

        sdev = []
        crmsd = []
        ccoef = []
        sdev_obs = []
        # append obs
        #----- normilized -----
        sdev.append(model_stats[model_type][month_ind]['sdev'][0] /
                    model_stats[model_type][month_ind]['sdev'][0])
        crmsd.append(model_stats[model_type][month_ind]['crmsd'][0] /
                     model_stats[model_type][month_ind]['sdev'][0])
        ccoef.append(model_stats[model_type][month_ind]['ccoef'][0])

        for month_ind in months:

            # statistics can be normalized
            # obs sdev=1 crmsd=0 ccoef=1
            # append 4-8 months
            #----- normilized -----
            sdev.append(model_stats[model_type][month_ind]['sdev'][1] /
                        model_stats[model_type][month_ind]['sdev'][0])
            crmsd.append(model_stats[model_type][month_ind]['crmsd'][1] /
                         model_stats[model_type][month_ind]['sdev'][0])
            ccoef.append(model_stats[model_type][month_ind]['ccoef'][1])

        # append all round year
        sdev = np.array(sdev)
        crmsd = np.array(crmsd)
        ccoef = np.array(ccoef)

        # add to model plot
        model_plot[model_type] = {"sdev": sdev, "crmsd": crmsd, "ccoef": ccoef}

    return model_plot
Beispiel #2
0
        self.ref = ref


if __name__ == '__main__':

    # Close any previously open graphics windows
    # ToDo: fails to work within Eclipse
    plt.close('all')

    # Read data from pickle file
    data = load_obj('taylor_data')

    # Calculate statistics for Taylor diagram
    # The first array element corresponds to the reference series
    # for the while the second is that for the predicted series.
    taylor_stats1 = sm.taylor_statistics(data.pred1, data.ref, 'data')
    taylor_stats2 = sm.taylor_statistics(data.pred2, data.ref, 'data')
    taylor_stats3 = sm.taylor_statistics(data.pred3, data.ref, 'data')

    # Store statistics in arrays
    sdev = np.array([
        taylor_stats1['sdev'][0], taylor_stats1['sdev'][1],
        taylor_stats2['sdev'][1], taylor_stats3['sdev'][1]
    ])
    crmsd = np.array([
        taylor_stats1['crmsd'][0], taylor_stats1['crmsd'][1],
        taylor_stats2['crmsd'][1], taylor_stats3['crmsd'][1]
    ])
    ccoef = np.array([
        taylor_stats1['ccoef'][0], taylor_stats1['ccoef'][1],
        taylor_stats2['ccoef'][1], taylor_stats3['ccoef'][1]
Beispiel #3
0
#!/usr/bin/python

import matplotlib.pyplot as plt
import numpy as np
import pickle
import skill_metrics as sm
from sys import version_info

dataobs = [0, 0, 0, 0, 0, 0, 0, 10, 44, 9, 0, 0, 0, 0, 0, 0, 0, 0]
datamodel = [0, 0, 0, 0, 0, 0, 2.2, 9.5, 0.4, 0, 0, 0, 0, 0, 0, 0, 0, 0]
datamodel2 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1, 0.1, 0, 0]

test = sm.taylor_statistics(datamodel, dataobs, 'data')
test2 = sm.taylor_statistics(datamodel2, dataobs, 'data')

# Store statistics in arrays
sdev = np.array([test['sdev'][0], test['sdev'][1], test2['sdev'][1]])
crmsd = np.array([test['crmsd'][0], test['crmsd'][1], test2['crmsd'][1]])
ccoef = np.array([test['ccoef'][0], test['ccoef'][1], test2['ccoef'][1]])

#print sdev
#print data.ref
sm.taylor_diagram(sdev, crmsd, ccoef)

# Show plot
plt.show()
np.savetxt(path + 'table_regularization.csv', table, delimiter=",", fmt='%s')

# ------------------------------------------------------------------------------
# -------------------------------- PLOT ----------------------------------------
# ------------------------------------------------------------------------------

# Set the figure properties (optional)
rcParams["figure.figsize"] = [10.0, 8]
rcParams['lines.linewidth'] = 1.25 # line width for plots
rcParams.update({'font.size': 13}) # font size of axes text

# Close any previously open graphics windows
# ToDo: fails to work within Eclipse
plt.close('all')
taylor_stats1  = sm.taylor_statistics(MWF_nnls, True_fM)
taylor_stats2  = sm.taylor_statistics(MWF_X2_I, True_fM)
taylor_stats3  = sm.taylor_statistics(MWF_X2_L1, True_fM)
taylor_stats4  = sm.taylor_statistics(MWF_X2_L2, True_fM)
taylor_stats5  = sm.taylor_statistics(MWF_Lcurve_I, True_fM)
taylor_stats6  = sm.taylor_statistics(MWF_Lcurve_L1, True_fM)
taylor_stats7  = sm.taylor_statistics(MWF_Lcurve_L2, True_fM)
taylor_stats8  = sm.taylor_statistics(MWF_GCV_I, True_fM)
taylor_stats9  = sm.taylor_statistics(MWF_GCV_L1, True_fM)
taylor_stats10 = sm.taylor_statistics(MWF_GCV_L2, True_fM)

target_stats1  = sm.target_statistics(MWF_nnls, True_fM)
target_stats2  = sm.target_statistics(MWF_X2_I, True_fM)
target_stats3  = sm.target_statistics(MWF_X2_L1, True_fM)
target_stats4  = sm.target_statistics(MWF_X2_L2, True_fM)
target_stats5  = sm.target_statistics(MWF_Lcurve_I, True_fM)
labels = ['Non-Dimensional Observation', 'POI', 'NB', 'ZIP', "ZINB", "RF"]

pred_poi = m[:, 0]
pred_nb = m[:, 1]
pred_zip = m[:, 2]
pred_zinb = m[:, 3]
pred_rf = m[:, 4]

target_mod_poi = sm.target_statistics(pred_poi, t, 'data')
target_mod_nb = sm.target_statistics(pred_nb, t, 'data')
target_mod_zip = sm.target_statistics(pred_zip, t, 'data')
target_mod_zinb = sm.target_statistics(pred_zinb, t, 'data')
target_mod_rf = sm.target_statistics(pred_rf, t, 'data')

taylor_mod_poi = sm.taylor_statistics(pred_poi, t, 'data')
taylor_mod_nb = sm.taylor_statistics(pred_nb, t, 'data')
taylor_mod_zip = sm.taylor_statistics(pred_zip, t, 'data')
taylor_mod_zinb = sm.taylor_statistics(pred_zinb, t, 'data')
taylor_mod_rf = sm.taylor_statistics(pred_rf, t, 'data')

target_bias = np.array([
    target_mod_poi['bias'], target_mod_nb['bias'], target_mod_zip['bias'],
    target_mod_zinb["bias"], target_mod_rf["bias"]
])
target_crmsd = np.array([
    target_mod_poi['crmsd'], target_mod_nb['crmsd'], target_mod_zip['crmsd'],
    target_mod_zinb["crmsd"], target_mod_rf["crmsd"]
])
target_rmsd = np.array([
    target_mod_poi['rmsd'], target_mod_nb['rmsd'], target_mod_zip['rmsd'],
Beispiel #6
0
def main():
    #import actual observation dataframe (BNU product)
    f = netCDF4.Dataset('BNU_2000-2010.nc4')
    LAI = f.variables['LAI']
    latt, lonn = f.variables['latitude'][:], f.variables['longitude'][:]

    #slice out Kenya
    latli_k, latui_k, lonli_k, lonui_k = slice([-4.75, 4.75], [33.25, 41.75],
                                               latt, lonn)
    #slice out Tanzania
    latli_t, latui_t, lonli_t, lonui_t = slice([-11.75, -1.25], [28.25, 40.75],
                                               latt, lonn)

    #observation slice by concatenating Kenya & Tanzania
    fSubset = np.concatenate([(f.variables['LAI'][:, latli_k:latui_k,
                                                  lonli_k:lonui_k]).flatten(),
                              (f.variables['LAI'][:, latli_t:latui_t,
                                                  lonli_t:lonui_t]).flatten()])

    #import all BG1 model dataframe, subset to 2000-2010
    model_dict = {}
    mypath = '/Users/nhuang37/Desktop/NYU DS/Yr1 Summer/Data/LAI/BG1'
    pathlist = [
        f for f in listdir(mypath)
        if isfile(join(mypath, f)) and f.endswith('nc4')
    ]
    for file in pathlist:
        print(file)
        name = file.split('_')[0] + file.split('_')[-1]  #tidy up the name
        f_model = netCDF4.Dataset(mypath + str('/') + file)
        f_model.set_auto_mask(False)  #missing value = -9999.0
        # subset 2000-2010
        time = f_model.variables['time']
        dates = num2date(time[:], time.units)
        start_date = datetime.datetime(2000, 1, 1)
        f_model_sub = np.concatenate([
            f_model.variables['LAI'][np.where(dates[dates > start_date])[0],
                                     latli_k:latui_k,
                                     lonli_k:lonui_k].flatten(),
            f_model.variables['LAI'][np.where(dates[dates > start_date])[0],
                                     latli_t:latui_t,
                                     lonli_t:lonui_t].flatten()
        ])
        #set missing value to 0
        f_model_sub[f_model_sub == -9999.0] = 0
        model_dict[name] = f_model_sub

    # add multi-model mean slice to the model dictionary
    model_dict['BG1_mean'] = multi_model_mean_slice(
        '/Users/nhuang37/Desktop/NYU DS/Yr1 Summer/Data/LAI/BG1/BG1_mean.txt')
    model_dict['SG3_mean'] = multi_model_mean_slice(
        '/Users/nhuang37/Desktop/NYU DS/Yr1 Summer/Data/LAI/BG1/SG3_mean.txt')

    # add LAI3G (another actual observation product) for comparison
    LAI3G = netCDF4.Dataset(
        '/Users/nhuang37/Desktop/NYU DS/Yr1 Summer/Data/LAI/Taylor Diagram/LAI3G_regrid.nc'
    )
    # subset 2000-2010 (-132), Kenya & Tanzania
    model_dict['LAI3G'] = np.concatenate([
        LAI3G.variables['LAI'][-132:, latli_k:latui_k,
                               lonli_k:lonui_k].flatten(),
        LAI3G.variables['LAI'][-132:, latli_t:latui_t,
                               lonli_t:lonui_t].flatten()
    ]) / 1000  #divide by 1000 to rescale

    # Make the Taylor Diagram
    # Set the figure properties (optional)
    rcParams["figure.figsize"] = [8.0, 6.4]
    rcParams['lines.linewidth'] = 1  # line width for plots
    rcParams.update({'font.size': 8})  # font size of axes text

    # Calculate statistics for Taylor diagram
    # The first array element (e.g. taylor_stats1[0]) corresponds to the
    # reference series while the second and subsequent elements
    # (e.g. taylor_stats1[1:]) are those for the predicted series.
    for i, model in enumerate(model_dict):
        #remove all 0 elements in model & ref (0 represents ocean grid cells, not useful for LAI correlation)
        taylor_stats = sm.taylor_statistics(model_dict[model][fSubset > 0],
                                            fSubset[fSubset > 0])
        if i == 0:
            sdev, crmsd, ccoef = [taylor_stats['sdev'][0]
                                  ], [taylor_stats['crmsd'][0]
                                      ], [taylor_stats['ccoef'][0]]
        sdev.append(taylor_stats['sdev'][1])
        crmsd.append(taylor_stats['crmsd'][1])
        ccoef.append(taylor_stats['ccoef'][1])
    sdev, crmsd, ccoef = np.array(sdev), np.array(crmsd), np.array(ccoef)

    #get labels
    label = list(model_dict.keys())
    label.insert(0, 'obs')
    #sort by correlation ccoef
    result = sorted(zip(label, sdev, crmsd, ccoef),
                    key=lambda x: x[3],
                    reverse=True)
    #unzip the result with sorted order
    label, sdev, crmsd, ccoef = zip(*result)
    sdev, crmsd, ccoef = np.array(sdev), np.array(crmsd), np.array(ccoef)
    #print out the result
    print(label, ccoef)

    #plot Taylor Diagram
    sm.taylor_diagram(sdev,
                      crmsd,
                      ccoef,
                      markerLabel=list(label),
                      markerLabelColor='r',
                      markerLegend='on',
                      markerColor='r',
                      styleOBS='-',
                      colOBS='r',
                      markerobs='o',
                      markerSize=6,
                      tickRMS=[0.0, 1.0, 2.0, 3.0],
                      tickRMSangle=115,
                      showlabelsRMS='on',
                      titleRMS='on',
                      titleOBS='Ref',
                      checkstats='on')
    plt.savefig('taylor_BG1_BNU_LAI3G.png', dpi=300)
Beispiel #7
0
        transform=ax.transAxes,
        fontsize=14,
        verticalalignment='top',
        bbox=props)
plt.show()
fig4.savefig(output_path + os.sep +
             'Comp_OMET_26.5N_RAPID_hindcast_time_series.jpg',
             dpi=400)

print '*******************************************************************'
print '**********************    Taylor Diagram    ***********************'
print '*******************************************************************'
# statistical operation inside skill_metrics function
# the time series must have the same size
# RAPID array observation is taken as reference
taylor_stats1 = sm.taylor_statistics(OMET_RAPID_monthly, OMET_RAPID_monthly)
taylor_stats2 = sm.taylor_statistics(OMET_ORAS4_RAPID_series[3:],
                                     OMET_RAPID_monthly[:-10])
taylor_stats3 = sm.taylor_statistics(OMET_GLORYS2V3_RAPID_series[3:],
                                     OMET_RAPID_monthly[:-10])
taylor_stats4 = sm.taylor_statistics(OMET_SODA3_RAPID_series[3:-2],
                                     OMET_RAPID_monthly)
taylor_stats5 = sm.taylor_statistics(OMET_hindcast_series[46 * 12 + 3:],
                                     OMET_RAPID_monthly[:-34])
# Store statistics in arrays
# Specify labels for points in a cell array (M1 for model prediction 1,
# etc.). Note that a label needs to be specified for the reference even
# though it is not used.
label = [
    'RAPID ARRAY Obs', 'RAPID ARRAY', 'ORAS4', 'GLORYS2V3', 'SODA3',
    'NEMO ORCA'
Beispiel #8
0
def make_model_evaluation(df_nonnan, model_path, ls_pred_dt, cfg_tds, cfg_op):
    X_test_ls = []
    y_test_ls = []
    cmap_pred_dt = plt.cm.get_cmap('viridis_r')

    ## Import dictionary with selected models:
    train_path_name = os.path.join(
        model_path, "model_dict_t0diff_maxdepth6_selfeat_gain.pkl")
    with open(train_path_name, "rb") as file:
        dict_sel_model = pickle.load(file)

    plt.close()
    fig = plt.figure(num=1, figsize=(7, 6))

    ## Loop over lead times:
    for i, pred_dt in enumerate(ls_pred_dt):

        if i == 0:
            xgb_model_ls = []
            pred_model_ls = []
            Rank_obs_ls = []
            top_features_ls = []
            df_param_ls_diff = []
            df_param_ls_rank = []
            df_param_ls_rank_PM = []
            df_param_ls_rank_pers = []
            Rank_pred_XGB_ls = []
            Rank_pred_XGB_PM_ls = []

        if len(X_test_ls) == len(ls_pred_dt) and len(y_test_ls) == len(
                ls_pred_dt):
            X_test = X_test_ls[i]
            y_test = y_test_ls[i]
        else:
            if i == 0:
                X_test_ls = []
                y_test_ls = []
            X_train, X_test, y_train, y_test = ipt.get_model_input(
                df_nonnan,
                del_TRTeqZero_tpred=True,
                split_Xy_traintest=True,
                X_normalise=False,
                pred_dt=pred_dt,
                check_for_nans=False,
                verbose=True)
            del (X_train, y_train)
            X_test_ls.append(X_test)
            y_test_ls.append(y_test)

        ## Load XGB model fitted to all features:
        with open(
                os.path.join(model_path,
                             "model_%i_t0diff_maxdepth6.pkl" % pred_dt),
                "rb") as file:
            xgb_model_feat = pickle.load(file)
        xgb_model_ls.append(xgb_model_feat)

        top_features = pd.DataFrame.from_dict(
            xgb_model_feat.get_booster().get_score(importance_type='gain'),
            orient="index",
            columns=["F_score"]).sort_values(by=['F_score'], ascending=False)
        top_features_ls.append(top_features)

        ## Get specific predictive model for this leadtime:
        pred_model = dict_sel_model["pred_mod_%i" % pred_dt]
        pred_model_ls.append(pred_model)

        ## Check that features agree:
        features_pred_model = pred_model.get_booster().feature_names
        n_features = len(features_pred_model)
        if set(features_pred_model) != set(top_features.index[:n_features]):
            raise ValueError(
                "Features of predictive model and top features of model fitted with all features do not agree"
            )

        ## Make prediction of TRT Rank differences:
        TRT_diff_pred = pred_model.predict(X_test[features_pred_model])

        ## Get set of different TRT Rank predictions:
        Rank_obs, Rank_pred_XGB, Rank_pred_XGB_PM, Rank_pred_pers, Rank_pred_pers_PM, \
            Rank_pred_diff, Diff_pred_XGB = get_obs_fcst_TRT_Rank(X_test["TRT_Rank|0"], TRT_diff_pred, y_test, X_test["TRT_Rank|-5"])
        Rank_obs_ls.append(Rank_obs)
        Rank_pred_XGB_ls.append(Rank_pred_XGB)
        Rank_pred_XGB_PM_ls.append(Rank_pred_XGB_PM)

        ## Plot scatterplots obs vs. predicted:
        plot_pred_vs_obs_core(y_test,
                              Diff_pred_XGB.values,
                              pred_dt,
                              "_XGB%i" % n_features,
                              cfg_tds,
                              outtype="TRT_Rank_diff")
        plot_pred_vs_obs_core(Rank_obs,
                              Rank_pred_XGB.values,
                              pred_dt,
                              "_XGB%i" % n_features,
                              cfg_tds,
                              outtype="TRT_Rank")
        plot_pred_vs_obs_core(Rank_obs,
                              Rank_pred_XGB_PM.values,
                              pred_dt,
                              "_XGB%i-ProbMatch" % n_features,
                              cfg_tds,
                              outtype="TRT_Rank")
        plot_pred_vs_obs_core(Rank_obs,
                              Rank_pred_pers.values,
                              pred_dt,
                              "_Pers",
                              cfg_tds,
                              outtype="TRT_Rank")
        plot_pred_vs_obs_core(Rank_obs,
                              Rank_pred_pers_PM.values,
                              pred_dt,
                              "_Pers-ProbMatch",
                              cfg_tds,
                              outtype="TRT_Rank")
        plot_pred_vs_obs_core(Rank_obs,
                              Rank_pred_diff.values,
                              pred_dt,
                              "_ConstDiff",
                              cfg_tds,
                              outtype="TRT_Rank")

        ## Calculate different term elements for R^2 / Brier Score calculation:
        df_param_ls_diff.append(
            get_R2_param(y_test.values, Diff_pred_XGB.values))
        df_param_ls_rank.append(
            get_R2_param(Rank_obs.values, Rank_pred_XGB.values))
        df_param_ls_rank_PM.append(
            get_R2_param(Rank_obs.values, Rank_pred_XGB_PM.values))
        df_param_ls_rank_pers.append(
            get_R2_param(Rank_obs.values, Rank_pred_pers.values))

        ## Calculate statistics for Taylor Diagram:
        stat_pred_XGB = sm.taylor_statistics(predicted=Rank_pred_XGB.values,
                                             reference=Rank_obs.values)
        stat_pred_XGB_PM = sm.taylor_statistics(
            predicted=Rank_pred_XGB_PM.values, reference=Rank_obs.values)
        stat_pred_pred_pers = sm.taylor_statistics(
            predicted=Rank_pred_pers.values, reference=Rank_obs.values)
        stat_pred_pred_diff = sm.taylor_statistics(
            predicted=Rank_pred_diff.values, reference=Rank_obs.values)
        stat_pred_pred_pers_PM = sm.taylor_statistics(
            predicted=Rank_pred_pers_PM.values, reference=Rank_obs.values)

        sdev = np.array([
            stat_pred_XGB['sdev'][0], stat_pred_XGB['sdev'][1],
            stat_pred_XGB_PM['sdev'][1], stat_pred_pred_pers['sdev'][1]
        ])
        crmsd = np.array([
            stat_pred_XGB['crmsd'][0], stat_pred_XGB['crmsd'][1],
            stat_pred_XGB_PM['crmsd'][1], stat_pred_pred_pers['crmsd'][1]
        ])
        ccoef = np.array([
            stat_pred_XGB['ccoef'][0], stat_pred_XGB['ccoef'][1],
            stat_pred_XGB_PM['ccoef'][1], stat_pred_pred_pers['ccoef'][1]
        ])
        #sdev  = np.array([stat_pred_XGB['sdev'][0], stat_pred_XGB['sdev'][1], stat_pred_XGB_PM['sdev'][1], stat_pred_pred_pers['sdev'][1], stat_pred_pred_diff['sdev'][1]])
        #crmsd = np.array([stat_pred_XGB['crmsd'][0], stat_pred_XGB['crmsd'][1], stat_pred_XGB_PM['crmsd'][1], stat_pred_pred_pers['crmsd'][1], stat_pred_pred_diff['crmsd'][1]])
        #ccoef = np.array([stat_pred_XGB['ccoef'][0], stat_pred_XGB['ccoef'][1], stat_pred_XGB_PM['ccoef'][1], stat_pred_pred_pers['ccoef'][1], stat_pred_pred_diff['ccoef'][1]])

        ## Plot Taylor Diagram:
        col_point = cmap_pred_dt(float(i) / len(ls_pred_dt))
        col_point = (col_point[0], col_point[1], col_point[2], 0.8)

        plot_markerLabel = ["Obs", "+%imin" % pred_dt, "", ""]
        plot_markerLabelColor = "black"
        if i == 0:
            plot_markerLegend = 'on'
            plot_overlay = 'off'
        else:
            plot_markerLegend = "on"
            plot_overlay = 'on'
            #plot_markerLabelColor = None
            if i == len(ls_pred_dt) - 1:
                plot_markerLabelColor = None
                plot_markerLabel = ["Obs", "XGB", "XGB (PM)", "Persistance"]

        sm.taylor_diagram(
            sdev / sdev[0],
            crmsd,
            ccoef,
            styleOBS='-',
            colOBS='darkred',
            markerobs='o',
            titleOBS='Obs',
            markerLabel=plot_markerLabel,
            markerLabelColor=plot_markerLabelColor,
            alpha=0.1,
            markerColor=col_point,
            markerLegend=plot_markerLegend,
            axismax=1.2,
            markerSize=5,
            colRMS='grey',
            styleRMS='--',
            widthRMS=0.8,
            rincRMS=0.25,
            tickRMS=np.arange(0.25, 1.5, 0.25),  #titleRMSangle = 110,
            colSTD='grey',
            styleSTD='-.',
            widthSTD=0.8,
            colCOR='grey',
            styleCOR=':',
            widthCOR=0.8,
            overlay=plot_overlay)

    ## Save Taylor Diagram:
    get_time_delta_colorbar(fig, ls_pred_dt, cmap_pred_dt,
                            [0.7, 0.5, 0.05, 0.3])
    plt.savefig(
        os.path.join(cfg_tds["fig_output_path"], "Taylor_Diagram_cmap.pdf"))
    plt.close()

    ## Plot histogram showing the effect of probability matching:
    print(
        "Save dataframe with observed, predicted, and predicted & PM TRT Ranks"
    )
    Rank_obs_df = pd.concat(Rank_obs_ls, axis=1, sort=True)
    Rank_obs_df.columns = [
        "TRT_Rank_obs|%i" % pred_dt for pred_dt in ls_pred_dt
    ]
    Rank_pred_XGB_df = pd.concat(Rank_pred_XGB_ls, axis=1, sort=True)
    Rank_pred_XGB_df.columns = [
        "TRT_Rank_pred|%i" % pred_dt for pred_dt in ls_pred_dt
    ]
    Rank_pred_XGB_PM_df = pd.concat(Rank_pred_XGB_PM_ls, axis=1, sort=True)
    Rank_pred_XGB_PM_df.columns = [
        "TRT_Rank_pred_PM|%i" % pred_dt for pred_dt in ls_pred_dt
    ]
    #plot_hist_probmatch(Rank_pred_XGB_df, Rank_pred_XGB_PM_df)
    Rank_obs_pred_df = pd.concat(
        [Rank_obs_df, Rank_pred_XGB_df, Rank_pred_XGB_PM_df],
        axis=1,
        sort=True)

    ## Get dataframe with observed, predicted, and predicted & PM TRT Ranks for operational PM:
    op_path_name = os.path.join(cfg_op["XGB_model_path"],
                                "TRT_Rank_obs_pred.pkl")
    with open(op_path_name, "wb") as file:
        pickle.dump(Rank_obs_pred_df, file, protocol=2)
    print("  saved dict to 'XGB_model_path' location:\n    %s" % op_path_name)
    prt_txt = """
    ---------------------------------------------------------------------------------
        The file 'TRT_Rank_obs_pred.pkl' in the
        directory '%s'
        is now used for the operational probability matching procedure, be aware of
        that!
    ---------------------------------------------------------------------------------\n""" % (
        cfg_op["XGB_model_path"])
    print(prt_txt)

    ## Plot skill scores as function of lead-time:
    df_R2_param_rank = pd.concat(df_param_ls_rank,
                                 axis=0).set_index(np.array(ls_pred_dt))
    df_R2_param_rank_PM = pd.concat(df_param_ls_rank_PM,
                                    axis=0).set_index(np.array(ls_pred_dt))
    df_R2_param_diff = pd.concat(df_param_ls_diff,
                                 axis=0).set_index(np.array(ls_pred_dt))
    df_R2_param_rank_pers = pd.concat(df_param_ls_rank_pers,
                                      axis=0).set_index(np.array(ls_pred_dt))
    plot_stats(df_R2_param_rank, "TRT_Rank", cfg_tds)
    plot_stats(df_R2_param_diff, "TRT_Rank_diff", cfg_tds)
    plot_stats_nice(df_R2_param_rank, "TRT_Rank", cfg_tds)
    plot_stats_nice(df_R2_param_diff, "TRT_Rank_diff", cfg_tds)
    plot_stats_nice(df_R2_param_rank_pers, "TRT_Rank_pers", cfg_tds)
    plot_stats_nice(df_R2_param_rank_PM, "TRT_Rank_PM", cfg_tds)

    ## Print IDs of long TRT cells in testing dataset:
    print(
        "\nThese are the IDs of long TRT cells (>25 time steps) in the testing dataset:"
    )
    TRT_ID = X_test_ls[-1].index
    TRT_ID = [TRT_ID_i[13:] for TRT_ID_i in TRT_ID.values]
    TRT_ID_count = Counter(TRT_ID)
    TRT_ID_count_sort = [
        (k, TRT_ID_count[k])
        for k in sorted(TRT_ID_count, key=TRT_ID_count.get, reverse=True)
    ]
    TRT_ID_count_sort_pd = pd.DataFrame(np.array(TRT_ID_count_sort),
                                        columns=["TRT_ID", "Count"])
    TRT_ID_count_sort_pd["Count"] = TRT_ID_count_sort_pd["Count"].astype(
        np.uint16, inplace=True)
    TRT_ID_long = TRT_ID_count_sort_pd.loc[TRT_ID_count_sort_pd["Count"] > 25]
    print(TRT_ID_long)

    TRT_ID_casestudy = [
        "2018080721250094", "2018080721300099", "2018080711400069",
        "2018080710200036"
    ]
    print("  Making analysis for TRT IDs (hardcoded!): %s" % TRT_ID_casestudy)

    TRT_ID_long_sel = TRT_ID_long.loc[TRT_ID_long['TRT_ID'].isin(
        TRT_ID_casestudy)]
    df_feature_ts_plot = pd.DataFrame.from_dict({
        "Radar":
        ["CZC_lt57dBZ|-45|SUM", "CZC_lt57dBZ|-45|SUM", "CZC_lt57dBZ|-45|SUM"],
        "Satellite": [
            "IR_097_stat|-20|PERC05", "IR_097_stat|-15|PERC01",
            "IR_097_stat|-20|MIN"
        ],
        "COSMO": [
            "CAPE_MU_stat|-10|PERC50", "CAPE_MU_stat|-5|PERC75",
            "CAPE_ML_stat|0|SUM"
        ],
        "Lightning": [
            "THX_densIC_stat|-30|SUM", "THX_curr_pos_stat|-40|SUM",
            "THX_curr_pos_stat|-30|SUM"
        ]
    })
    for i_sel in range(len(TRT_ID_long_sel)):
        print("    Working on cell %s" % TRT_ID_long_sel.iloc[i_sel]["TRT_ID"])
        plot_pred_time_series(TRT_ID_long_sel.iloc[i_sel], df_nonnan,
                              Rank_pred_XGB_ls, ls_pred_dt, cfg_tds)
        plot_pred_time_series(TRT_ID_long_sel.iloc[i_sel],
                              df_nonnan,
                              Rank_pred_XGB_PM_ls,
                              ls_pred_dt,
                              cfg_tds,
                              path_addon="PM",
                              title_addon=" (PM)")

        plot_var_time_series_dt0_multiquant(TRT_ID_long_sel.iloc[i_sel],
                                            df_nonnan, cfg_tds)

        for i_pred_dt, pred_dt in enumerate([10, 20, 30]):
            fig = plt.figure(figsize=[10, 6])
            ax_rad = fig.add_subplot(2, 2, 1)
            ax_sat = fig.add_subplot(2, 2, 2)
            ax_cos = fig.add_subplot(2, 2, 3)
            ax_thx = fig.add_subplot(2, 2, 4)
            ax_ls = [ax_rad, ax_sat, ax_cos, ax_thx]
            #fig, axes = plt.subplots(2,2)
            #fig.set_size_inches(8,6)
            for i_source, source in enumerate(
                ["Radar", "Satellite", "COSMO", "Lightning"]):
                ls_feat_param = df_feature_ts_plot[source].iloc[
                    i_pred_dt].split("|")
                past_dt = np.arange(-45, 0,
                                    5) if int(ls_feat_param[1]) != 0 else [0]
                ax_ls[i_source] = plot_var_time_series(
                    TRT_ID_long_sel.iloc[i_sel],
                    df_nonnan,
                    ls_feat_param[0],
                    ls_feat_param[2],
                    past_dt=past_dt,
                    dt_highlight=int(ls_feat_param[1]),
                    ax=ax_ls[i_source])
            plt.tight_layout()
            plt.savefig(
                os.path.join(
                    cfg_tds["fig_output_path"], "Feat_series_%i_%s.pdf" %
                    (pred_dt, TRT_ID_long_sel.iloc[i_sel]["TRT_ID"])))
            plt.close()