def setUp(self): """initialize class cluster and composites""" # cluster cl_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/clusters_America_prec_t_test.ini" cl_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/" cl_output_label = "TEST" cl_config = Config("Test.log") self.predictand = Predictand(cl_inifile, cl_output_path, cl_output_label, cl_config.config_dict) # composite co_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/composites_America_PSL.ini" co_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/" co_output_label = "TEST" co_config = Config("Test.log") self.precursors = Precursors(co_inifile, co_output_path, co_output_label, co_config.config_dict) # set cluster method parameters self.method_name = "ward" self.k = 2 self.predictand_var = "prec_t" # initialize Forecast class self.forecast_nn = ForecastNN(cl_inifile, cl_config.config_dict, self.k, self.method_name) self.initialize_data()
def main(cl_parser: ClusteringParser, cl_config: dict): logger.info("Start forecast_nn model") # load inifile according to variable # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given inifile = cl_parser.arguments['inifile'] output_label = cl_parser.arguments['outputlabel'] output_path = cl_parser.arguments['outputpath'] data_range = cl_parser.arguments['datarange'] predictand = Predictand(inifile, output_path, output_label, cl_config) dict_skills_pattern = {} # load forecast_nn-parameters method_name = 'ward' k = 5 forecast_nn = ForecastNN(inifile, output_path, output_label, cl_config, predictand.var, k, method_name) logger.info("Clusters: " + str(forecast_nn.k)) # load precursors precursors = Precursors(inifile, output_path, output_label, cl_config) # Create train and test dataset with an 66:33 split # noinspection PyPep8Naming y_train, X_train, y_test, X_test = train_test_split_pred(predictand, precursors, data_range) # Calculate clusters of precursors for var, by removing one year predictand.calculate_clusters_from_test_data(y_train, forecast_nn.method_name, forecast_nn.k) # Calculate composites precursors.get_composites_data_1d_train_test(X_train, predictand.f, forecast_nn.k, forecast_nn.method_name, predictand.var) # precursors.plot_composites(k, 1) # subtract train mean also for test data # for prec in forecast_nn.list_precursors_all: # X_test[prec] -= precursors.varmean # y_test[predictand.var] -= predictand.varmean # df_parameters_opt = pd.DataFrame(columns=["precursor", "nr_neurons", "opt_method", "nr_epochs", "nr_layers", "lr_rate", # "nr_batch_size", "time_correlation", "pattern_correlation"]) nr_epochs = 500 #for forecast_predictands in forecast_nn.list_precursors_combinations: # Calculate forecast_nn for all years # ~ forecast_nn.list_precursors = forecast_predictands forecast_nn.list_precursors = ["Z500"] list_methods = ["SGD","Adam"] forecast_predictands = forecast_nn.list_precursors index_df = 0 def objective(opt_m, nr_batch_size, lr_rate, nr_layers, nr_neurons): opt_method = "Adam" # train small NN forecast_nn.train_nn_opt(forecast_nn.list_precursors, predictand.clusters, precursors.dict_composites, X_train, y_train[f"{predictand.var}"], nr_neurons, opt_method, nr_epochs, nr_layers, lr_rate, nr_batch_size) # Calculate forecast_nn for all years pattern_corr_values = [] # Prediction forecast_data = np.zeros((len(y_test[f"{predictand.var}"]), predictand.dict_pred_1D[f"{predictand.var}"].shape[1])) logger.info(forecast_predictands) for year in range(len(y_test[predictand.var])): # len(y_test[predictand.var])): print(year) forecast_temp = forecast_nn.prediction_nn(forecast_nn.list_precursors_all, predictand.clusters, precursors.dict_composites, X_test, year) # Assign forecast_nn data to array forecast_data[year] = forecast_temp # Calculate pattern correlation pattern_corr_values.append( stats.pearsonr(forecast_temp, y_test[f"{predictand.var}"][year])[0]) # Calculate time correlation for each point time_correlation, significance = forecast_nn.calculate_time_correlation_all_times( np.array(y_test[f"{predictand.var}"]), forecast_data) # Reshape correlation maps pred_t_corr_reshape = np.reshape(time_correlation, (predictand.dict_predict[predictand.var].shape[1], predictand.dict_predict[predictand.var].shape[2])) significance_corr_reshape = np.reshape(significance, ( predictand.dict_predict[predictand.var].shape[1], predictand.dict_predict[predictand.var].shape[2])) logger.info(f'time correlation: {np.nanmean(pred_t_corr_reshape)}') logger.info(f'pattern correlation: {np.nanmean(pattern_corr_values)}') logger.info("Plot and save variables") ex = ExportVarPlot(output_label, cl_config) ex.save_plot_and_time_correlationNN(forecast_nn.list_precursors, predictand, pred_t_corr_reshape, significance_corr_reshape, forecast_nn.list_precursors_all, np.nanmean(pred_t_corr_reshape), nr_neurons, opt_method, nr_epochs, nr_layers, lr_rate, nr_batch_size) df_parameters_opt = pd.DataFrame({"precursor": ex.predictor_names, "nr_neurons": nr_neurons, "opt_method": opt_method, "nr_epochs": nr_epochs, "nr_layers": nr_layers, "lr_rate": lr_rate, "nr_batch_size": nr_batch_size, "time_correlation": np.nanmean(pred_t_corr_reshape), "pattern_correlation": np.nanmean(pattern_corr_values),}, index=[index_df]) filename = f'output-{output_label}/skill_correlation-{predictand.var}-opt-sim.csv' with open(filename, 'a') as f: df_parameters_opt.to_csv(f, header=f.tell() == 0) index_df +=1 return time_correlation
def main(cl_parser: ClusteringParser, cl_config: dict): logger.info("Start forecast_nn model opt") # load inifile according to variable # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given inifile = cl_parser.arguments['inifile'] output_label = cl_parser.arguments['outputlabel'] output_path = cl_parser.arguments['outputpath'] data_range = cl_parser.arguments['datarange'] predictand = Predictand(inifile, output_path, output_label, cl_config) dict_skills_pattern = {} # load forecast_nn-parameters method_name = 'ward' k = 5 forecast_nn = ForecastNN(inifile, output_path, output_label, cl_config, predictand.var, k, method_name) logger.info("Clusters: " + str(forecast_nn.k)) # load precursors precursors = Precursors(inifile, output_path, output_label, cl_config) # Create train and test dataset with an 66:33 split # noinspection PyPep8Naming y_train, X_train, y_test, X_test = train_test_split_pred( predictand, precursors, data_range) # Calculate clusters of precursors for var, by removing one year predictand.calculate_clusters_from_test_data(y_train, forecast_nn.method_name, forecast_nn.k) # ~ predictand.plot_composites(forecast_nn.k, 0.00001) # Calculate composites precursors.get_composites_data_1d_train_test(X_train, predictand.f, forecast_nn.k, forecast_nn.method_name, predictand.var) # precursors.plot_composites(k, 1) # subtract train mean also for test data # for prec in forecast_nn.list_precursors_all: # X_test[prec] -= precursors.varmean # y_test[predictand.var] -= predictand.varmean for forecast_predictands in forecast_nn.list_precursors_combinations: # Calculate forecast_nn for all years forecast_nn.list_precursors = forecast_predictands # train small NN forecast_nn.train_nn(forecast_nn.list_precursors, predictand.clusters, precursors.dict_composites, X_train, y_train[f"{predictand.var}"]) # Calculate forecast_nn for all years pattern_corr_values = [] # Prediction forecast_data = np.zeros( (len(y_test[f"{predictand.var}"]), predictand.dict_pred_1D[f"{predictand.var}"].shape[1])) logger.info(forecast_predictands) for year in range(len( y_test[predictand.var])): # len(y_test[predictand.var])): print(year) forecast_temp = forecast_nn.prediction_nn_model( forecast_nn.list_precursors_all, predictand.clusters, precursors.dict_composites, X_test, year) # Assign forecast_nn data to array forecast_data[year] = forecast_temp # Calculate pattern correlation # remove zeros from array # forecast_temp = forecast_temp[forecast_temp != 0] # obs_temp = y_test[f"{predictand.var}"][year][y_test[f"{predictand.var}"][year] != 0] pattern_corr_values.append( stats.pearsonr(forecast_temp, y_test[f"{predictand.var}"][year])[0]) # Calculate time correlation for each point time_correlation, significance = forecast_nn.calculate_time_correlation_all_times( np.array(y_test[f"{predictand.var}"]), forecast_data) # Reshape correlation maps pred_t_corr_reshape = np.reshape( time_correlation, (predictand.dict_predict[predictand.var].shape[1], predictand.dict_predict[predictand.var].shape[2])) significance_corr_reshape = np.reshape( significance, (predictand.dict_predict[predictand.var].shape[1], predictand.dict_predict[predictand.var].shape[2])) logger.info(f'time correlation: {np.nanmean(pred_t_corr_reshape)}') logger.info(f'pattern correlation: {np.nanmean(pattern_corr_values)}') # Plot correlation map, if specified in ini-file if forecast_nn.plot: logger.info("Plot and save variables") ex = ExportVarPlot(output_label, cl_config) ex.save_plot_and_time_correlation(forecast_nn.list_precursors, predictand, pred_t_corr_reshape, significance_corr_reshape, forecast_nn.list_precursors_all, np.nanmean(pred_t_corr_reshape)) dict_skills_pattern[ex.predictor_names] = { 'time correlation': np.nanmean(pred_t_corr_reshape), 'pattern correlation': np.nanmean(pattern_corr_values) } if forecast_nn.plot: with open( f'{output_path}/output-{output_label}/skill_correlation-{predictand.var}.json', 'w') as fp: json.dump(dict_skills_pattern, fp)
class TestForecastNN(unittest.TestCase): """ Create test class for Forcast""" def setUp(self): """initialize class cluster and composites""" # cluster cl_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/clusters_America_prec_t_test.ini" cl_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/" cl_output_label = "TEST" cl_config = Config("Test.log") self.predictand = Predictand(cl_inifile, cl_output_path, cl_output_label, cl_config.config_dict) # composite co_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/composites_America_PSL.ini" co_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/" co_output_label = "TEST" co_config = Config("Test.log") self.precursors = Precursors(co_inifile, co_output_path, co_output_label, co_config.config_dict) # set cluster method parameters self.method_name = "ward" self.k = 2 self.predictand_var = "prec_t" # initialize Forecast class self.forecast_nn = ForecastNN(cl_inifile, cl_config.config_dict, self.k, self.method_name) self.initialize_data() def initialize_data(self): """ initialize toy data to test algorithm""" # create data for the two different composites # first two are snow data and second two data points are ice data self.gaussian_distributions = [ { "mean": [-1, 1, 1, -1], "sigma": [[0.00001, 0., 0., 0.], [0., 0.00001, 0., 0.], [0., 0., 0.00001, 0.], [0., 0., 0., 0.00001]] }, { "mean": [-1, 0, 1, 1], "sigma": [[0.00001, 0., 0., 0.], [0., 0.00001, 0., 0.], [0., 0., 0.00001, 0.], [0., 0., 0., 0.00001]] }, ] # create time series self.t_end = 5000 self.time_series = range(self.t_end) # create instance to get samples for sic and sce precursors = MixtureGaussianModel(self.gaussian_distributions) # get samples self.X = (precursors.rvs(self.t_end)) # array which lead with composites to clusters pf PRCP self.array = np.array( [[1, 2, 1, 1], [-0.5, 0, -0.5, 1.], [-1, 0, -1, -1]], np.float) self.prcp_clusters = [{"cluster": [1, -1, 1]}, {"cluster": [1, 1, -1]}] self.prcp = PredictandToyModel(self.prcp_clusters, self.array) self.y = self.prcp.get_data_from_precursors(self.X) # set data to predictand input arrays self.predictand.dict_standardized_pred_1D[self.predictand.var] = self.y self.predictand.dict_pred_1D[self.predictand.var] = self.y # set data to precursors input data self.precursors.dict_precursors["snow"] = self.X[:, :2] self.precursors.dict_standardized_precursors["snow"] = self.X[:, :2] self.precursors.dict_prec_1D["snow"] = self.X[:, :2] self.precursors.dict_precursors["ice"] = self.X[:, 2:] self.precursors.dict_standardized_precursors["ice"] = self.X[:, 2:] self.precursors.dict_prec_1D["ice"] = self.X[:, 2:] self.precursors.dict_standardized_precursors.pop("PSL") self.precursors.dict_prec_1D.pop("PSL") self.precursors.dict_precursors.pop("PSL") # Create train and test dataset with an 66:33 split self.y_train, self.X_train, self.y_test, self.X_test = self.train_test_split_pred( self.predictand, self.precursors, test_size=0.66, random_state=2019) @staticmethod def train_test_split_pred(predictand, precursors, test_size=0.66, random_state=2019): np.random.seed(random_state) len_predicts = len(predictand.dict_pred_1D[predictand.var]) len_test_data = int(len_predicts * test_size) selected_time_steps = np.random.choice(len_predicts, len_test_data, replace=False) y_train = {} # noinspection PyPep8Naming X_train = {} y_test = {} # noinspection PyPep8Naming X_test = {} for i in range(len_predicts): if i in selected_time_steps: y_train.setdefault(predictand.var, []).append( predictand.dict_pred_1D[predictand.var][i]) for prec in precursors.dict_precursors.keys(): X_train.setdefault(prec, []).append( precursors.dict_prec_1D[prec][i]) else: y_test.setdefault(predictand.var, []).append( predictand.dict_pred_1D[predictand.var][i]) for prec in precursors.dict_precursors.keys(): X_test.setdefault(prec, []).append( precursors.dict_prec_1D[prec][i]) return y_train, X_train, y_test, X_test def calculate_clusters_and_composites(self): # Calculate clusters of precursors for var, by removing one year self.calculate_clusters_from_test_data(self.y_train, self.method_name, self.k) # Calculate composites self.precursors.get_composites_data_1d_train_test( self.X_train, self.predictand.f, self.k, self.method_name, self.predictand_var) def calculate_forecast(self): """calculate forecast_nn using toy model data""" self.calculate_clusters_and_composites() self.forecast_nn.list_precursors_all = ["snow", "ice"] self.forecast_nn.list_precursors_combinations = [["snow"], ["ice"], ["snow", "ice"]] # for this test purpose we take both precursors #train model using training data self.forecast_nn.train_nn(self.forecast_nn.list_precursors_all, self.predictand.clusters, self.precursors.dict_composites, self.X_train, self.y_train[self.predictand_var]) self.forecast_data = np.zeros( (len(self.y_test[self.predictand.var]), self.predictand.dict_pred_1D[f"{self.predictand.var}"].shape[1])) # Calculate forecast_nn for all years self.pattern_corr_values = [] # Prediction for year in range(len(self.y_test[ self.predictand.var])): # len(y_test[predictand.var])): forecast_temp = self.forecast_nn.prediction_nn( self.forecast_nn.list_precursors_all, self.predictand.clusters, self.precursors.dict_composites, self.X_test, year) # Assign forecast_nn data to array self.forecast_data[year] = forecast_temp # Calculate pattern correlation self.pattern_corr_values.append( round( stats.pearsonr(self.forecast_data[year], self.y_test[self.predictand.var][year])[0])) # Round data for correlation analysis for j in range(len(self.y_test[self.predictand.var])): for i in range(len(self.y_test[self.predictand.var][j])): self.y_test[self.predictand.var][j][i] = round( self.y_test[self.predictand.var][j][i]) self.forecast_data[j][i] = round(self.forecast_data[j][i]) def calculate_clusters_from_test_data(self, train_data: dict, method_name: str, k: int): """ calculate clusters for predictand variable :param train_data: cluster data which should be used to calculate clusters :param method_name: name of the method used for clustering :param k: number of clusters """ print('Calculate clusters') self.predictand.dict_standardized_pred_1D = train_data self.predictand._set_method_name(method_name) self.predictand._set_k(k) self.predictand._set_linkage() self.predictand._set_f() self.predictand._cluster_frequency() self.predictand._set_clusters_1d()
def main(cl_parser: ClusteringParser, cl_config: dict): logger.info("Start forecast_nn model") # load inifile according to variable # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given inifile = cl_parser.arguments['inifile'] output_label = cl_parser.arguments['outputlabel'] output_path = cl_parser.arguments['outputpath'] data_range = cl_parser.arguments['datarange'] data_range = cl_parser.arguments['datarange'] predictand = Predictand(inifile, output_path, output_label, cl_config) dict_skills_pattern = {} # load precursors precursors = Precursors(inifile, output_path, output_label, cl_config) # load forecast_nn-parameters method_name = 'ward' k = 5 # unfortunately, I can not load the library tensorflow and therefore the class ForecastNN # as the beginning, because netcdf load function for xarray does not work then from classes.ForecastNN import ForecastNN forecast_nn = ForecastNN(inifile, output_path, output_label, cl_config, predictand.var, k, method_name) logger.info("Clusters: " + str(forecast_nn.k)) # Create train and test dataset with an 66:33 split # noinspection PyPep8Naming y_train, X_train, y_test, X_test = train_test_split_pred( predictand, precursors, data_range) # Calculate clusters of precursors for var, by removing one year predictand.calculate_clusters_from_test_data(y_train, forecast_nn.method_name, forecast_nn.k) # Calculate composites precursors.get_composites_data_1d_train_test(X_train, X_test, predictand.f, forecast_nn.k, forecast_nn.method_name, predictand.var) # precursors.plot_composites(k, 1) # subtract train mean also for test data # for prec in forecast_nn.list_precursors_all: # X_test[prec] -= precursors.varmean # y_test[predictand.var] -= predictand.varmean df_parameters_opt = pd.DataFrame(columns=[ "precursor", "nr_neurons", "opt_method", "nr_epochs", "nr_layers", "lr_rate", "nr_batch_size", "time_correlation", "pattern_correlation" ]) # nr_epochs = 500 #for forecast_predictands in forecast_nn.list_precursors_combinations: # Calculate forecast_nn for all years # ~ forecast_nn.list_precursors = forecast_predictands forecast_precursors = cl_parser.arguments['forecast_precursors'] logger.info(forecast_precursors) forecast_nn.list_precursors = forecast_precursors list_methods = ["SGD", "Adam"] forecast_predictands = forecast_nn.list_precursors dict_calc_X_y = { 'composites_1d': precursors.dict_composites, 'forecast_predictands': forecast_nn.list_precursors, 'clusters_1d': predictand.clusters, } # calculate pseudo-values meaning that y contains at element 0 the time step for the correct forecast variable # Cannot do it differently, because y-train values have to have to same dimension than the output, we would like to # get (beta-values), comparison can done differently alphas_train, alphas_val, y_train_pseudo, y_val_pseudo = forecast_nn.calc_alphas_for_talos( X_train, y_train[predictand.var], dict_calc_X_y) len_alpha = len(alphas_train) # set the parameter space boundary p = { # 'lr': [0.01], 'lr': [0.0001], # 'lr': [0.01], # 'lr': [0.001], # 'activation': ['relu', 'elu'], 'activation': ['relu'], 'kernel_initializer': ['random_uniform'], # 'optimizer': ['Nadam','Adam','SGD'], 'optimizer': ['Adam'], 'losses': ['logcosh'], 'shapes': ['brick'], 'first_neuron': [5], # 'first_neuron': [5], 'forecast_predictands': [forecast_nn.list_precursors], 'len_alpha': [len_alpha], # 'hidden_layers': [2, 3], 'hidden_layers': [3], 'dropout': [.1], # 'dropout': [.1], 'batch_size': [64], # 'batch_size': [5], 'epochs': [95, 105], # [800], 'last_activation': ['linear'], 'y_train': [y_train[predictand.var]], 'x_test': [X_test], 'y_test': [y_test[predictand.var]], 'composites_1d': [precursors.dict_composites], 'precursor': [precursors.var], 'pattern_corr': [1], 'time_corr': [1], 'taylor_skill': [0], # } # logger.info(f' precursor: {p["precursor"]}') index_df = 0 import talos as ta t = ta.Scan(x=alphas_train, y=y_train_pseudo, x_val=alphas_val, y_val=y_val_pseudo, model=forecast_nn.train_nn_talos, params=p, experiment_name='opt-nn-clustering')