import coalition3.inout.paths as pth import coalition3.inout.readconfig as cfg import coalition3.statlearn.fitting as fit import coalition3.statlearn.inputprep as ipt ## Uncomment when running on Mac OS: #os.environ['KMP_DUPLICATE_LIB_OK']='True' ## ============================================================================ ## Get config info: cfg_tds = cfg.get_config_info_tds() cfg_op, __, __ = cfg.get_config_info_op() ## Load training dataframe: user_argv_path = sys.argv[1] if len(sys.argv)==2 else None path_to_df = pth.file_path_reader("pandas training dataframe (nonnan)",user_argv_path) print("\nLoading nonnan dataframe into RAM") df_nonnan = pd.read_hdf(path_to_df,key="df_nonnan") ## Load list with models: model_path_xgb = pth.file_path_reader("XGBoost model list") model_path_mlp = pth.file_path_reader("MLP model list") with open(model_path_xgb,"rb") as file: ls_models_xgb = pickle.load(file) with open(model_path_mlp,"rb") as file: ls_models_mlp = pickle.load(file) ## Get prediction leadtime from model: pred_dt = -1 while (pred_dt%5!=0 or pred_dt<0): pred_dt = int(raw_input("For which lead time should comparison be made? ") ## Get features of largest models (ANN and XGB)
several times the same DATE_TRT_ID), the overlap in this dimension is deleted in one dataset! Merging -> Merging two training datasets with the same dimensions but different variables (e.g. append newly created NWC-SAF statistics). Since dimensions must agree, it is necessary to define the type of join (inner -> suggested, outer, ..) which should be performed. In case of an inner join, only the dimension ranges are kept which occur in both datasets. """ print(print_text) combi_type = None while (combi_type != "m" and combi_type != "c"): combi_type = raw_input("Merge or Concatenation? [m/c] ") ## 1) Reading the paths to the files: print_title("Paths to the respective xarray datasets:") path_str_1 = pth.file_path_reader(1) path_str_2 = pth.file_path_reader(2) ## 2) Reading the files: print_title("Loading the xarray datasets:") xr_1 = rxr.xarray_file_loader(path_str_1) xr_2 = rxr.xarray_file_loader(path_str_2) print(" Finished loading the datasets") ## 3) Compare dimensions of the datasets: print_title("Comparing the dimensions of the datasets:") unequal_dimensions = [] #print("Compare dimensions of the two datasets:") for item in xr_1.dims: if xr_1[item].equals(xr_2[item]): print(" Dimension %s equal" % item)
# coding: utf-8 import matplotlib.pylab as plt import matplotlib.colors as mcolors import numpy as np import xarray as xr import os import coalition3.inout.paths as pth import coalition3.inout.readxr as rxr import coalition3.operational.statistics as stat from coalition3.visualisation.TRTcells import contour_of_2dHist path_to_xarray = pth.file_path_reader("xarray training dataset") xr_new_TRT = rxr.xarray_file_loader(path_to_xarray) #xr_new_TRT = xr.open_mfdataset("Combined_stat_pixcount.nc") Rank_TRT = xr_new_TRT["RANKr"]/10. Rank_TRT_rand = Rank_TRT+np.random.uniform(-0.1,0.1,len(Rank_TRT)) Rank_COAL3_new = xr_new_TRT["TRT_Rank"] Rank_COAL3_allmed = stat.calc_TRT_Rank(xr_new_TRT,ET_option="all_median")["TRT_Rank"] Rank_COAL3_allmax = stat.calc_TRT_Rank(xr_new_TRT,ET_option="all_max")["TRT_Rank"] fig, axes = plt.subplots(nrows=1, ncols=3, figsize=[15,4.2]) axes[0].set_ylabel('TRT Rank (COAL3 - Cond. Median ET45)') axes[1].set_ylabel('TRT Rank (COAL3 - Median ET45)') axes[2].set_ylabel('TRT Rank (COAL3 - Max ET45)') hist2d_1 = axes[0].hist2d(Rank_TRT_rand.values,Rank_COAL3_new.sel(time_delta=0).values.flatten(),bins=150,range=[[0,4],[0,4]],norm=mcolors.LogNorm(),cmap="magma") #fig.colorbar(hist2d_1[3], ax=axes[0], extend='max') hist2d_2 = axes[1].hist2d(Rank_TRT_rand.values,Rank_COAL3_allmed.sel(time_delta=0).values.flatten(),bins=150,range=[[0,4],[0,4]],norm=mcolors.LogNorm(),cmap="magma")
import coalition3.statlearn.fitting as fit import coalition3.statlearn.feature as feat import coalition3.statlearn.inputprep as ipt import coalition3.statlearn.modeleval as mev ## Uncomment when running on Mac OS: #os.environ['KMP_DUPLICATE_LIB_OK']='True' ## ============================================================================ ## Get config info: cfg_tds = cfg.get_config_info_tds() cfg_op, __, __ = cfg.get_config_info_op() ## Open pandas training dataframe: user_argv_path = sys.argv[1] if len(sys.argv) == 2 else None path_to_df = pth.file_path_reader("pandas training dataframe (nonnan)", user_argv_path) model_path = pth.file_path_reader("XGBoost model saving location") print("\nLoading nonnan dataframe into RAM") df_nonnan = pd.read_hdf(path_to_df, key="df_nonnan") ## Delete rows where TRT Rank is close to zero at t0: print("\nRemove rows where TRT Rank (t0) is close to zero") df_nonnan_nonzerot0 = df_nonnan.loc[df_nonnan["TRT_Rank|0"] >= 0.15] del (df_nonnan) ## Get feature importance for specified time delta: ## Get lead times: ls_pred_dt = feat.get_pred_dt_ls("the feature selection", cfg_op["timestep"], cfg_op["n_integ"]) ## Get model boundaries:
import coalition3.statlearn.feature as feat import coalition3.statlearn.inputprep as ipt import sklearn.metrics as met from sklearn.neural_network import MLPRegressor from sklearn.model_selection import GridSearchCV ## ============================================================================ ## Get config info: cfg_tds = cfg.get_config_info_tds() cfg_op, __, __ = cfg.get_config_info_op() mod_name = "" ## Open pandas training dataframe: user_argv_path = sys.argv[1] if len(sys.argv) == 2 else None path_to_df = pth.file_path_reader("pandas training dataframe (nonnan)", user_argv_path) model_path = pth.file_path_reader("model saving location") print("\nLoading nonnan dataframe into RAM") df_nonnan = pd.read_hdf(path_to_df, key="df_nonnan") ## Get lead-time from user: ls_pred_dt = feat.get_pred_dt_ls("the ANN fit", cfg_op["timestep"], cfg_op["n_integ"]) ## Loop over time-deltas: for pred_dt in ls_pred_dt: ## Get normalised training and testing data: X_train, X_test, y_train, y_test, scaler = ipt.get_model_input( df_nonnan, del_TRTeqZero_tpred=True, split_Xy_traintest=True,
bbox=props) plt.tight_layout() path_addon_num = "_".join([str(num) for num in list_min_plus]) if len(path_addon)>0: path_addon = "_"+path_addon plt.savefig(os.path.join(cfg_tds["fig_output_path"],"TRT_diff_scatter_%s%s.pdf" % (path_addon_num,path_addon)), orientation="landscape") ## ============================================================================ ## Get config info: cfg_tds = cfg.get_config_info_tds() col10 = '#E69F00' col30 = '#D55E00' ## Open pandas training dataframe: user_argv_path = sys.argv[1] if len(sys.argv)==2 else None path_to_df = pth.file_path_reader("pandas training dataframe",user_argv_path) import_nonnan = False if os.path.exists("%s_nonnan.h5" % os.path.splitext(path_to_df)[0]): import_ans = "" while (import_ans!="y" and import_ans!="n"): import_ans = raw_input(" Dataframe ending '.. _nonnan.h5' already exists, import this one? [y/n] ") if import_ans=="y": df_nonnan = pd.read_hdf("%s_nonnan.h5" % os.path.splitext(path_to_df)[0],"df_nonnan") import_nonnan = True if not import_nonnan: df = pd.read_hdf(path_to_df,key="df") ## Make analysis of how many values are missing per column: df_nan_count_sort = df.isna().sum().sort_values(ascending=False) df_nan_count_sort[:6].plot(drawstyle="steps", linewidth=2)
# coding: utf-8 """ [COALITION3] Import xarray dataset containing statistics and pixel counts, and convert into 2d Pandas dataframe containing the predictive variables (statistics and TRT information) and the target variables (TRT Ranks) """ ## Import packages and define functions: from __future__ import print_function import os import sys import coalition3.inout.paths as pth import coalition3.inout.readxr as rxr import coalition3.operational.convertds as cds ## ============================================================================ print("\n%s\n Converting xarray training dataset to 2D Pandas dataframe\n" % (80 * '-')) print(" Read path to xarray training dataset") user_argv_path = sys.argv[1] if len(sys.argv) == 2 else None path_to_ds = pth.file_path_reader("xarray training dataset", user_argv_path) path_to_df = "%s_df.h5" % (os.path.splitext(path_to_ds)[0]) ## Load xarray dataset: ds = rxr.xarray_file_loader(path_to_ds) ## Convert to pandas dataframe: cds.convert_ds2df(ds, outpath=path_to_df, diff_option=None)