Exemple #1
0
import coalition3.inout.paths as pth
import coalition3.inout.readconfig as cfg
import coalition3.statlearn.fitting as fit
import coalition3.statlearn.inputprep as ipt

## Uncomment when running on Mac OS:
#os.environ['KMP_DUPLICATE_LIB_OK']='True'
    
## ============================================================================
## Get config info:
cfg_tds = cfg.get_config_info_tds()
cfg_op, __, __ = cfg.get_config_info_op()

## Load training dataframe:
user_argv_path = sys.argv[1] if len(sys.argv)==2 else None
path_to_df = pth.file_path_reader("pandas training dataframe (nonnan)",user_argv_path)
print("\nLoading nonnan dataframe into RAM")
df_nonnan  = pd.read_hdf(path_to_df,key="df_nonnan")

## Load list with models:
model_path_xgb = pth.file_path_reader("XGBoost model list")
model_path_mlp = pth.file_path_reader("MLP model list")
with open(model_path_xgb,"rb") as file: ls_models_xgb = pickle.load(file)
with open(model_path_mlp,"rb") as file: ls_models_mlp = pickle.load(file)

## Get prediction leadtime from model:
pred_dt = -1
while (pred_dt%5!=0 or pred_dt<0):
    pred_dt = int(raw_input("For which lead time should comparison be made? ")

## Get features of largest models (ANN and XGB)
Exemple #2
0
                   several times the same DATE_TRT_ID), the overlap in this dimension is
                   deleted in one dataset!
  Merging       -> Merging two training datasets with the same dimensions but different
                   variables (e.g. append newly created NWC-SAF statistics).
                   Since dimensions must agree, it is necessary to define the type of join
                   (inner -> suggested, outer, ..) which should be performed. In case of an
                   inner join, only the dimension ranges are kept which occur in both datasets.
"""
print(print_text)
combi_type = None
while (combi_type != "m" and combi_type != "c"):
    combi_type = raw_input("Merge or Concatenation? [m/c] ")

## 1) Reading the paths to the files:
print_title("Paths to the respective xarray datasets:")
path_str_1 = pth.file_path_reader(1)
path_str_2 = pth.file_path_reader(2)

## 2) Reading the files:
print_title("Loading the xarray datasets:")
xr_1 = rxr.xarray_file_loader(path_str_1)
xr_2 = rxr.xarray_file_loader(path_str_2)
print("  Finished loading the datasets")

## 3) Compare dimensions of the datasets:
print_title("Comparing the dimensions of the datasets:")
unequal_dimensions = []
#print("Compare dimensions of the two datasets:")
for item in xr_1.dims:
    if xr_1[item].equals(xr_2[item]):
        print("  Dimension %s equal" % item)
# coding: utf-8
import matplotlib.pylab as plt
import matplotlib.colors as mcolors
import numpy as np
import xarray as xr
import os

import coalition3.inout.paths as pth
import coalition3.inout.readxr as rxr
import coalition3.operational.statistics as stat
from coalition3.visualisation.TRTcells import contour_of_2dHist


path_to_xarray = pth.file_path_reader("xarray training dataset")
xr_new_TRT = rxr.xarray_file_loader(path_to_xarray)
#xr_new_TRT = xr.open_mfdataset("Combined_stat_pixcount.nc")

Rank_TRT = xr_new_TRT["RANKr"]/10.
Rank_TRT_rand = Rank_TRT+np.random.uniform(-0.1,0.1,len(Rank_TRT))

Rank_COAL3_new    = xr_new_TRT["TRT_Rank"]
Rank_COAL3_allmed = stat.calc_TRT_Rank(xr_new_TRT,ET_option="all_median")["TRT_Rank"]
Rank_COAL3_allmax = stat.calc_TRT_Rank(xr_new_TRT,ET_option="all_max")["TRT_Rank"]

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=[15,4.2])
axes[0].set_ylabel('TRT Rank (COAL3 - Cond. Median ET45)')
axes[1].set_ylabel('TRT Rank (COAL3 - Median ET45)')
axes[2].set_ylabel('TRT Rank (COAL3 - Max ET45)')
hist2d_1 = axes[0].hist2d(Rank_TRT_rand.values,Rank_COAL3_new.sel(time_delta=0).values.flatten(),bins=150,range=[[0,4],[0,4]],norm=mcolors.LogNorm(),cmap="magma")
#fig.colorbar(hist2d_1[3], ax=axes[0], extend='max')
hist2d_2 = axes[1].hist2d(Rank_TRT_rand.values,Rank_COAL3_allmed.sel(time_delta=0).values.flatten(),bins=150,range=[[0,4],[0,4]],norm=mcolors.LogNorm(),cmap="magma")
import coalition3.statlearn.fitting as fit
import coalition3.statlearn.feature as feat
import coalition3.statlearn.inputprep as ipt
import coalition3.statlearn.modeleval as mev

## Uncomment when running on Mac OS:
#os.environ['KMP_DUPLICATE_LIB_OK']='True'

## ============================================================================
## Get config info:
cfg_tds = cfg.get_config_info_tds()
cfg_op, __, __ = cfg.get_config_info_op()

## Open pandas training dataframe:
user_argv_path = sys.argv[1] if len(sys.argv) == 2 else None
path_to_df = pth.file_path_reader("pandas training dataframe (nonnan)",
                                  user_argv_path)
model_path = pth.file_path_reader("XGBoost model saving location")
print("\nLoading nonnan dataframe into RAM")
df_nonnan = pd.read_hdf(path_to_df, key="df_nonnan")

## Delete rows where TRT Rank is close to zero at t0:
print("\nRemove rows where TRT Rank (t0) is close to zero")
df_nonnan_nonzerot0 = df_nonnan.loc[df_nonnan["TRT_Rank|0"] >= 0.15]
del (df_nonnan)

## Get feature importance for specified time delta:
## Get lead times:
ls_pred_dt = feat.get_pred_dt_ls("the feature selection", cfg_op["timestep"],
                                 cfg_op["n_integ"])

## Get model boundaries:
Exemple #5
0
import coalition3.statlearn.feature as feat
import coalition3.statlearn.inputprep as ipt

import sklearn.metrics as met
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV

## ============================================================================
## Get config info:
cfg_tds = cfg.get_config_info_tds()
cfg_op, __, __ = cfg.get_config_info_op()
mod_name = ""

## Open pandas training dataframe:
user_argv_path = sys.argv[1] if len(sys.argv) == 2 else None
path_to_df = pth.file_path_reader("pandas training dataframe (nonnan)",
                                  user_argv_path)
model_path = pth.file_path_reader("model saving location")
print("\nLoading nonnan dataframe into RAM")
df_nonnan = pd.read_hdf(path_to_df, key="df_nonnan")

## Get lead-time from user:
ls_pred_dt = feat.get_pred_dt_ls("the ANN fit", cfg_op["timestep"],
                                 cfg_op["n_integ"])

## Loop over time-deltas:
for pred_dt in ls_pred_dt:
    ## Get normalised training and testing data:
    X_train, X_test, y_train, y_test, scaler = ipt.get_model_input(
        df_nonnan,
        del_TRTeqZero_tpred=True,
        split_Xy_traintest=True,
Exemple #6
0
                        bbox=props)
                        
    plt.tight_layout()
    path_addon_num = "_".join([str(num) for num in list_min_plus])
    if len(path_addon)>0: path_addon = "_"+path_addon
    plt.savefig(os.path.join(cfg_tds["fig_output_path"],"TRT_diff_scatter_%s%s.pdf" % (path_addon_num,path_addon)), orientation="landscape")

## ============================================================================
## Get config info:
cfg_tds = cfg.get_config_info_tds()
col10 = '#E69F00'
col30 = '#D55E00'

## Open pandas training dataframe:
user_argv_path = sys.argv[1] if len(sys.argv)==2 else None
path_to_df = pth.file_path_reader("pandas training dataframe",user_argv_path)

import_nonnan = False
if os.path.exists("%s_nonnan.h5" % os.path.splitext(path_to_df)[0]):
    import_ans = ""
    while (import_ans!="y" and import_ans!="n"):
        import_ans = raw_input("  Dataframe ending '.. _nonnan.h5' already exists, import this one? [y/n] ")
    if import_ans=="y":
        df_nonnan = pd.read_hdf("%s_nonnan.h5" % os.path.splitext(path_to_df)[0],"df_nonnan")
        import_nonnan = True
if not import_nonnan:
    df = pd.read_hdf(path_to_df,key="df")

## Make analysis of how many values are missing per column:
df_nan_count_sort = df.isna().sum().sort_values(ascending=False)
df_nan_count_sort[:6].plot(drawstyle="steps", linewidth=2)
Exemple #7
0
# coding: utf-8
""" [COALITION3] Import xarray dataset containing statistics and
    pixel counts, and convert into 2d Pandas dataframe containing
    the predictive variables (statistics and TRT information)
    and the target variables (TRT Ranks) """

## Import packages and define functions:
from __future__ import print_function

import os
import sys

import coalition3.inout.paths as pth
import coalition3.inout.readxr as rxr
import coalition3.operational.convertds as cds

## ============================================================================
print("\n%s\n Converting xarray training dataset to 2D Pandas dataframe\n" %
      (80 * '-'))

print("  Read path to xarray training dataset")
user_argv_path = sys.argv[1] if len(sys.argv) == 2 else None
path_to_ds = pth.file_path_reader("xarray training dataset", user_argv_path)
path_to_df = "%s_df.h5" % (os.path.splitext(path_to_ds)[0])

## Load xarray dataset:
ds = rxr.xarray_file_loader(path_to_ds)

## Convert to pandas dataframe:
cds.convert_ds2df(ds, outpath=path_to_df, diff_option=None)