def setUp(self): """initialize class cluster and composites""" # cluster cl_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/clusters_America_prec_t_test.ini" cl_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/" cl_output_label = "TEST" cl_config = Config("Test.log") self.predictand = Predictand(cl_inifile, cl_output_path, cl_output_label, cl_config.config_dict) # composite co_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/composites_America_PSL.ini" co_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/" co_output_label = "TEST" co_config = Config("Test.log") self.precursors = Precursors(co_inifile, co_output_path, co_output_label, co_config.config_dict) # set cluster method parameters self.method_name = "ward" self.k = 2 self.predictand_var = "prec_t" # initialize Forecast class self.forecast_nn = ForecastNN(cl_inifile, cl_config.config_dict, self.k, self.method_name) self.initialize_data()
def main(cl_parser: ClusteringParser, cl_config: dict): logger.info("Start forecast_nn model") # load inifile according to variable # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given inifile = cl_parser.arguments['inifile'] output_label = cl_parser.arguments['outputlabel'] output_path = cl_parser.arguments['outputpath'] predictand = Predictand(inifile, output_path, output_label, cl_config) # load forecast_nn-parameters method_name = 'ward' k = 5 forecast = Forecast(inifile, cl_config, k, method_name) logger.info("Clusters: " + str(forecast.k)) diff = int(forecast.end_year) - int(forecast.beg_year) forecast_data = np.zeros((diff, predictand.dict_pred_1D[f"{predictand.var}"].shape[1])) pattern_corr_values = [] # load precursors precursors = Precursors(inifile, output_path, output_label, cl_config) all_precs_names = [x for x in precursors.dict_precursors.keys()] # Create train and test dataset with an 66:33 split y_train, X_train, y_test, X_test = train_test_split_pred(predictand, precursors, test_size=0.66, random_state=2019) # Calculate clusters of precursors for var, by removing one year predictand.calculate_clusters_from_test_data(y_train, forecast.method_name, forecast.k, ) # Calculate composites precursors.get_composites_data_1d_train_test(X_train, predictand.f, forecast.k, forecast.method_name, predictand.var) precursors.plot_composites(k)
def main(cl_parser: ClusteringParser, cl_config: dict): logger.info("Start forecast_nn model") # load inifile according to variable inifile = cl_parser.arguments['inifile'] output_path = cl_parser.arguments['outputpath'] output_label = cl_parser.arguments['outputlabel'] predictand = Predictand(inifile, output_path, output_label, cl_config) # load forecast_nn-parameters method_name = 'ward' k = 5 forecast = Forecast(inifile, cl_config, k, method_name) logger.info("Clusters: " + str(forecast.k)) diff = int(forecast.end_year) - int(forecast.beg_year) forecast_data = np.zeros((diff, predictand.dict_pred_1D[f"{predictand.var}"].shape[1])) pattern_corr_values = [] # load precursors precursors = Precursors(inifile, output_label, cl_config) all_precs_names = [x for x in precursors.dict_precursors.keys()]
def main(cl_parser: ClusteringParser, cl_config: dict): logger.info("Start forecast_nn model") # load inifile according to variable # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given inifile = cl_parser.arguments['inifile'] output_label = cl_parser.arguments['outputlabel'] output_path = cl_parser.arguments['outputpath'] data_range = cl_parser.arguments['datarange'] predictand = Predictand(inifile, output_path, output_label, cl_config) dict_skills_pattern = {} # load forecast_nn-parameters method_name = 'ward' k = 5 forecast_nn = ForecastNN(inifile, output_path, output_label, cl_config, predictand.var, k, method_name) logger.info("Clusters: " + str(forecast_nn.k)) # load precursors precursors = Precursors(inifile, output_path, output_label, cl_config) # Create train and test dataset with an 66:33 split # noinspection PyPep8Naming y_train, X_train, y_test, X_test = train_test_split_pred(predictand, precursors, data_range) # Calculate clusters of precursors for var, by removing one year predictand.calculate_clusters_from_test_data(y_train, forecast_nn.method_name, forecast_nn.k) # Calculate composites precursors.get_composites_data_1d_train_test(X_train, predictand.f, forecast_nn.k, forecast_nn.method_name, predictand.var) # precursors.plot_composites(k, 1) # subtract train mean also for test data # for prec in forecast_nn.list_precursors_all: # X_test[prec] -= precursors.varmean # y_test[predictand.var] -= predictand.varmean # df_parameters_opt = pd.DataFrame(columns=["precursor", "nr_neurons", "opt_method", "nr_epochs", "nr_layers", "lr_rate", # "nr_batch_size", "time_correlation", "pattern_correlation"]) nr_epochs = 500 #for forecast_predictands in forecast_nn.list_precursors_combinations: # Calculate forecast_nn for all years # ~ forecast_nn.list_precursors = forecast_predictands forecast_nn.list_precursors = ["Z500"] list_methods = ["SGD","Adam"] forecast_predictands = forecast_nn.list_precursors index_df = 0 def objective(opt_m, nr_batch_size, lr_rate, nr_layers, nr_neurons): opt_method = "Adam" # train small NN forecast_nn.train_nn_opt(forecast_nn.list_precursors, predictand.clusters, precursors.dict_composites, X_train, y_train[f"{predictand.var}"], nr_neurons, opt_method, nr_epochs, nr_layers, lr_rate, nr_batch_size) # Calculate forecast_nn for all years pattern_corr_values = [] # Prediction forecast_data = np.zeros((len(y_test[f"{predictand.var}"]), predictand.dict_pred_1D[f"{predictand.var}"].shape[1])) logger.info(forecast_predictands) for year in range(len(y_test[predictand.var])): # len(y_test[predictand.var])): print(year) forecast_temp = forecast_nn.prediction_nn(forecast_nn.list_precursors_all, predictand.clusters, precursors.dict_composites, X_test, year) # Assign forecast_nn data to array forecast_data[year] = forecast_temp # Calculate pattern correlation pattern_corr_values.append( stats.pearsonr(forecast_temp, y_test[f"{predictand.var}"][year])[0]) # Calculate time correlation for each point time_correlation, significance = forecast_nn.calculate_time_correlation_all_times( np.array(y_test[f"{predictand.var}"]), forecast_data) # Reshape correlation maps pred_t_corr_reshape = np.reshape(time_correlation, (predictand.dict_predict[predictand.var].shape[1], predictand.dict_predict[predictand.var].shape[2])) significance_corr_reshape = np.reshape(significance, ( predictand.dict_predict[predictand.var].shape[1], predictand.dict_predict[predictand.var].shape[2])) logger.info(f'time correlation: {np.nanmean(pred_t_corr_reshape)}') logger.info(f'pattern correlation: {np.nanmean(pattern_corr_values)}') logger.info("Plot and save variables") ex = ExportVarPlot(output_label, cl_config) ex.save_plot_and_time_correlationNN(forecast_nn.list_precursors, predictand, pred_t_corr_reshape, significance_corr_reshape, forecast_nn.list_precursors_all, np.nanmean(pred_t_corr_reshape), nr_neurons, opt_method, nr_epochs, nr_layers, lr_rate, nr_batch_size) df_parameters_opt = pd.DataFrame({"precursor": ex.predictor_names, "nr_neurons": nr_neurons, "opt_method": opt_method, "nr_epochs": nr_epochs, "nr_layers": nr_layers, "lr_rate": lr_rate, "nr_batch_size": nr_batch_size, "time_correlation": np.nanmean(pred_t_corr_reshape), "pattern_correlation": np.nanmean(pattern_corr_values),}, index=[index_df]) filename = f'output-{output_label}/skill_correlation-{predictand.var}-opt-sim.csv' with open(filename, 'a') as f: df_parameters_opt.to_csv(f, header=f.tell() == 0) index_df +=1 return time_correlation
def main(cl_parser: ClusteringParser, cl_config: dict): logger.info("Start forecast_nn model") # load inifile according to variable # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given inifile = cl_parser.arguments['inifile'] output_label = cl_parser.arguments['outputlabel'] output_path = cl_parser.arguments['outputpath'] data_range = cl_parser.arguments['datarange'] predictand = Predictand(inifile, output_path, output_label, cl_config) dict_skills_pattern = {} # load forecast_nn-parameters method_name = 'ward' k = 5 forecast = Forecast(inifile, cl_config, k, method_name) logger.info("Clusters: " + str(forecast.k)) # load precursors precursors = Precursors(inifile, output_path, output_label, cl_config) # Create train and test dataset with an 66:33 split # noinspection PyPep8Naming y_train, X_train, y_test, X_test = train_test_split_pred( predictand, precursors, data_range) # Calculate clusters of precursors for var, by removing one year predictand.calculate_clusters_from_test_data(y_train, forecast.method_name, forecast.k) # Calculate composites precursors.get_composites_data_1d_train_test(X_train, predictand.f, forecast.k, forecast.method_name, predictand.var) # precursors.plot_composites(k, 1) # subtract train mean also for test data # for prec in forecast_nn.list_precursors_all: # X_test[prec] -= precursors.varmean # y_test[predictand.var] -= predictand.varmean index_df = 0 for forecast_predictands in forecast.list_precursors_combinations: # Calculate forecast_nn for all years forecast.list_precursors = forecast_predictands pattern_corr_values = [] # Prediction forecast_data = np.zeros( (len(y_test[f"{predictand.var}"]), predictand.dict_pred_1D[f"{predictand.var}"].shape[1])) logger.info(forecast_predictands) for year in range(len( y_test[predictand.var])): # len(y_test[predictand.var])): # print(year) forecast_temp = forecast.prediction(predictand.clusters, precursors.dict_composites, X_test, year) # Assign forecast_nn data to array forecast_data[year] = forecast_temp # Calculate pattern correlation # remove zeros from array # forecast_temp = forecast_temp[forecast_temp != 0] # obs_temp = y_test[f"{predictand.var}"][year][y_test[f"{predictand.var}"][year] != 0] pattern_corr_values.append( stats.pearsonr(forecast_temp, y_test[f"{predictand.var}"][year])[0]) # Calculate time correlation for each point time_correlation, significance = forecast.calculate_time_correlation_all_times( np.array(y_test[f"{predictand.var}"]), forecast_data) # Reshape correlation maps pred_t_corr_reshape = np.reshape( time_correlation, (predictand.dict_predict[predictand.var].shape[1], predictand.dict_predict[predictand.var].shape[2])) significance_corr_reshape = np.reshape( significance, (predictand.dict_predict[predictand.var].shape[1], predictand.dict_predict[predictand.var].shape[2])) logger.info(f'time correlation: {np.nanmean(pred_t_corr_reshape)}') logger.info(f'pattern correlation: {np.nanmean(pattern_corr_values)}') # Plot correlation map, if specified in ini-file if forecast.plot: logger.info("Plot and save variables") ex = ExportVarPlot(output_label, cl_config) ex.save_plot_and_time_correlation(forecast.list_precursors, predictand, pred_t_corr_reshape, significance_corr_reshape, forecast.list_precursors_all, np.nanmean(pred_t_corr_reshape)) dict_skills_pattern[ex.predictor_names] = { 'time correlation': np.nanmean(pred_t_corr_reshape), 'pattern correlation': np.nanmean(pattern_corr_values) } df_parameters_opt = pd.DataFrame( { "precursor": ex.predictor_names, "time_correlation": np.nanmean(pred_t_corr_reshape), "pattern_correlation": np.nanmean(pattern_corr_values), }, index=[index_df]) filename = f'output-{output_label}/skill_correlation-{predictand.var}-{index_df}-opt.csv' with open(filename, 'a') as f: df_parameters_opt.to_csv(f, header=f.tell() == 0) index_df += 1 if forecast.plot: with open( f'{output_path}/output-{output_label}/skill_correlation-{predictand.var}.json', 'w') as fp: json.dump(dict_skills_pattern, fp)
class TestForecastNN(unittest.TestCase): """ Create test class for Forcast""" def setUp(self): """initialize class cluster and composites""" # cluster cl_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/clusters_America_prec_t_test.ini" cl_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/" cl_output_label = "TEST" cl_config = Config("Test.log") self.predictand = Predictand(cl_inifile, cl_output_path, cl_output_label, cl_config.config_dict) # composite co_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/composites_America_PSL.ini" co_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/" co_output_label = "TEST" co_config = Config("Test.log") self.precursors = Precursors(co_inifile, co_output_path, co_output_label, co_config.config_dict) # set cluster method parameters self.method_name = "ward" self.k = 2 self.predictand_var = "prec_t" # initialize Forecast class self.forecast_nn = ForecastNN(cl_inifile, cl_config.config_dict, self.k, self.method_name) self.initialize_data() def initialize_data(self): """ initialize toy data to test algorithm""" # create data for the two different composites # first two are snow data and second two data points are ice data self.gaussian_distributions = [ { "mean": [-1, 1, 1, -1], "sigma": [[0.00001, 0., 0., 0.], [0., 0.00001, 0., 0.], [0., 0., 0.00001, 0.], [0., 0., 0., 0.00001]] }, { "mean": [-1, 0, 1, 1], "sigma": [[0.00001, 0., 0., 0.], [0., 0.00001, 0., 0.], [0., 0., 0.00001, 0.], [0., 0., 0., 0.00001]] }, ] # create time series self.t_end = 5000 self.time_series = range(self.t_end) # create instance to get samples for sic and sce precursors = MixtureGaussianModel(self.gaussian_distributions) # get samples self.X = (precursors.rvs(self.t_end)) # array which lead with composites to clusters pf PRCP self.array = np.array( [[1, 2, 1, 1], [-0.5, 0, -0.5, 1.], [-1, 0, -1, -1]], np.float) self.prcp_clusters = [{"cluster": [1, -1, 1]}, {"cluster": [1, 1, -1]}] self.prcp = PredictandToyModel(self.prcp_clusters, self.array) self.y = self.prcp.get_data_from_precursors(self.X) # set data to predictand input arrays self.predictand.dict_standardized_pred_1D[self.predictand.var] = self.y self.predictand.dict_pred_1D[self.predictand.var] = self.y # set data to precursors input data self.precursors.dict_precursors["snow"] = self.X[:, :2] self.precursors.dict_standardized_precursors["snow"] = self.X[:, :2] self.precursors.dict_prec_1D["snow"] = self.X[:, :2] self.precursors.dict_precursors["ice"] = self.X[:, 2:] self.precursors.dict_standardized_precursors["ice"] = self.X[:, 2:] self.precursors.dict_prec_1D["ice"] = self.X[:, 2:] self.precursors.dict_standardized_precursors.pop("PSL") self.precursors.dict_prec_1D.pop("PSL") self.precursors.dict_precursors.pop("PSL") # Create train and test dataset with an 66:33 split self.y_train, self.X_train, self.y_test, self.X_test = self.train_test_split_pred( self.predictand, self.precursors, test_size=0.66, random_state=2019) @staticmethod def train_test_split_pred(predictand, precursors, test_size=0.66, random_state=2019): np.random.seed(random_state) len_predicts = len(predictand.dict_pred_1D[predictand.var]) len_test_data = int(len_predicts * test_size) selected_time_steps = np.random.choice(len_predicts, len_test_data, replace=False) y_train = {} # noinspection PyPep8Naming X_train = {} y_test = {} # noinspection PyPep8Naming X_test = {} for i in range(len_predicts): if i in selected_time_steps: y_train.setdefault(predictand.var, []).append( predictand.dict_pred_1D[predictand.var][i]) for prec in precursors.dict_precursors.keys(): X_train.setdefault(prec, []).append( precursors.dict_prec_1D[prec][i]) else: y_test.setdefault(predictand.var, []).append( predictand.dict_pred_1D[predictand.var][i]) for prec in precursors.dict_precursors.keys(): X_test.setdefault(prec, []).append( precursors.dict_prec_1D[prec][i]) return y_train, X_train, y_test, X_test def calculate_clusters_and_composites(self): # Calculate clusters of precursors for var, by removing one year self.calculate_clusters_from_test_data(self.y_train, self.method_name, self.k) # Calculate composites self.precursors.get_composites_data_1d_train_test( self.X_train, self.predictand.f, self.k, self.method_name, self.predictand_var) def calculate_forecast(self): """calculate forecast_nn using toy model data""" self.calculate_clusters_and_composites() self.forecast_nn.list_precursors_all = ["snow", "ice"] self.forecast_nn.list_precursors_combinations = [["snow"], ["ice"], ["snow", "ice"]] # for this test purpose we take both precursors #train model using training data self.forecast_nn.train_nn(self.forecast_nn.list_precursors_all, self.predictand.clusters, self.precursors.dict_composites, self.X_train, self.y_train[self.predictand_var]) self.forecast_data = np.zeros( (len(self.y_test[self.predictand.var]), self.predictand.dict_pred_1D[f"{self.predictand.var}"].shape[1])) # Calculate forecast_nn for all years self.pattern_corr_values = [] # Prediction for year in range(len(self.y_test[ self.predictand.var])): # len(y_test[predictand.var])): forecast_temp = self.forecast_nn.prediction_nn( self.forecast_nn.list_precursors_all, self.predictand.clusters, self.precursors.dict_composites, self.X_test, year) # Assign forecast_nn data to array self.forecast_data[year] = forecast_temp # Calculate pattern correlation self.pattern_corr_values.append( round( stats.pearsonr(self.forecast_data[year], self.y_test[self.predictand.var][year])[0])) # Round data for correlation analysis for j in range(len(self.y_test[self.predictand.var])): for i in range(len(self.y_test[self.predictand.var][j])): self.y_test[self.predictand.var][j][i] = round( self.y_test[self.predictand.var][j][i]) self.forecast_data[j][i] = round(self.forecast_data[j][i]) def calculate_clusters_from_test_data(self, train_data: dict, method_name: str, k: int): """ calculate clusters for predictand variable :param train_data: cluster data which should be used to calculate clusters :param method_name: name of the method used for clustering :param k: number of clusters """ print('Calculate clusters') self.predictand.dict_standardized_pred_1D = train_data self.predictand._set_method_name(method_name) self.predictand._set_k(k) self.predictand._set_linkage() self.predictand._set_f() self.predictand._cluster_frequency() self.predictand._set_clusters_1d()
def main(cl_parser: ClusteringParser, cl_config: dict): logger.info("Start forecast model opt") # load inifile according to variable # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given inifile = cl_parser.arguments['inifile'] output_label = cl_parser.arguments['outputlabel'] output_path = cl_parser.arguments['outputpath'] data_range = cl_parser.arguments['datarange'] logger.info(inifile) logger.info(output_path) logger.info(output_label) logger.info(data_range) predictand = Predictand(inifile, output_path, output_label, cl_config) dict_skills_pattern = {} # load forecast_nn-parameters method_name = 'ward' k = 6 forecast = Forecast(inifile, cl_config, k, method_name) logger.info("Clusters: " + str(forecast.k)) # load precursors precursors = Precursors(inifile, output_path, output_label, cl_config) forecast_precursors = cl_parser.arguments['forecast_precursors'] logger.info(forecast_precursors) y_train, X_train, y_test, X_test = train_test_split_pred(predictand, precursors, data_range, forecast_precursors) # Calculate clusters of precursors for var, by removing one year predictand.calculate_clusters_from_test_data(y_train, forecast.method_name, forecast.k) def skill(x, info): if lat_range(x) < 0 or lon_range(x) < 0: return 1 cut_area_opt = x precursors.set_area_composite_opt(forecast_precursors[0], cut_area_opt) # Create train and test dataset with an 66:33 split # noinspection PyPep8Naming X_train, X_test = train_test_split_prec(precursors, data_range, forecast_precursors) # Calculate clusters of precursors for var, by removing one year # Calculate composites precursors.get_composites_data_1d_train_test(X_train, predictand.f, forecast.k, forecast.method_name, predictand.var) # Calculate forecast_nn for all years forecast.list_precursors = forecast_precursors pattern_corr_values = [] # Prediction forecast_data = np.zeros((len(y_test[f"{predictand.var}"]), predictand.dict_pred_1D[f"{predictand.var}"].shape[1])) logger.info(forecast_precursors) for year in range(len(y_test[predictand.var])): # len(y_test[predictand.var])): # print(year) if math.isnan(np.nanmean(precursors.dict_composites[forecast_precursors[0]][0])): return 1 forecast_temp = forecast.prediction(predictand.clusters, precursors.dict_composites, X_test, year) # Assign forecast_nn data to array forecast_data[year] = forecast_temp # Calculate time correlation for each point time_correlation, significance = forecast.calculate_time_correlation_all_times( np.array(y_test[f"{predictand.var}"]), forecast_data) # display information # display information time_correlation_mean = np.nanmean(time_correlation) if info['best_values'] >= time_correlation_mean: info['best_values'] = time_correlation_mean logger.info(f"{x[0]:4f} {x[1]:4f} {x[2]:4f} {x[3]:4f} {time_correlation_mean} {info['Nfeval']}") info['Nfeval'] += 1 if math.isnan(time_correlation_mean): return 1. else: return -time_correlation_mean cons = ({'type': 'ineq', 'fun': lat_range}, {'type': 'ineq', 'fun': lon_range},) var = forecast_precursors[0] bounds = [(precursors.lat_min[var],precursors.lat_max[var] - 10), (precursors.lat_min[var] + 10, precursors.lat_max[var]), (precursors.lon_min[var], precursors.lon_max[var] - 10), (precursors.lon_min[var] + 10,precursors.lon_max[var])] # ~ res = shgo(skill, bounds, args=({'Nfeval':0},), iters=10, constraints=cons) res = dual_annealing(skill, bounds, args=({'Nfeval':0, 'best_values' : 1},), maxiter=5000) print(res)
def main(cl_parser: ClusteringParser, cl_config: dict): logger.info("Start forecast_nn model") # load inifile according to variable # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given inifile = cl_parser.arguments['inifile'] output_label = cl_parser.arguments['outputlabel'] output_path = cl_parser.arguments['outputpath'] data_range = cl_parser.arguments['datarange'] data_range = cl_parser.arguments['datarange'] predictand = Predictand(inifile, output_path, output_label, cl_config) dict_skills_pattern = {} # load precursors precursors = Precursors(inifile, output_path, output_label, cl_config) # load forecast_nn-parameters method_name = 'ward' k = 5 # unfortunately, I can not load the library tensorflow and therefore the class ForecastNN # as the beginning, because netcdf load function for xarray does not work then from classes.ForecastNN import ForecastNN forecast_nn = ForecastNN(inifile, output_path, output_label, cl_config, predictand.var, k, method_name) logger.info("Clusters: " + str(forecast_nn.k)) # Create train and test dataset with an 66:33 split # noinspection PyPep8Naming y_train, X_train, y_test, X_test = train_test_split_pred( predictand, precursors, data_range) # Calculate clusters of precursors for var, by removing one year predictand.calculate_clusters_from_test_data(y_train, forecast_nn.method_name, forecast_nn.k) # Calculate composites precursors.get_composites_data_1d_train_test(X_train, X_test, predictand.f, forecast_nn.k, forecast_nn.method_name, predictand.var) # precursors.plot_composites(k, 1) # subtract train mean also for test data # for prec in forecast_nn.list_precursors_all: # X_test[prec] -= precursors.varmean # y_test[predictand.var] -= predictand.varmean df_parameters_opt = pd.DataFrame(columns=[ "precursor", "nr_neurons", "opt_method", "nr_epochs", "nr_layers", "lr_rate", "nr_batch_size", "time_correlation", "pattern_correlation" ]) # nr_epochs = 500 #for forecast_predictands in forecast_nn.list_precursors_combinations: # Calculate forecast_nn for all years # ~ forecast_nn.list_precursors = forecast_predictands forecast_precursors = cl_parser.arguments['forecast_precursors'] logger.info(forecast_precursors) forecast_nn.list_precursors = forecast_precursors list_methods = ["SGD", "Adam"] forecast_predictands = forecast_nn.list_precursors dict_calc_X_y = { 'composites_1d': precursors.dict_composites, 'forecast_predictands': forecast_nn.list_precursors, 'clusters_1d': predictand.clusters, } # calculate pseudo-values meaning that y contains at element 0 the time step for the correct forecast variable # Cannot do it differently, because y-train values have to have to same dimension than the output, we would like to # get (beta-values), comparison can done differently alphas_train, alphas_val, y_train_pseudo, y_val_pseudo = forecast_nn.calc_alphas_for_talos( X_train, y_train[predictand.var], dict_calc_X_y) len_alpha = len(alphas_train) # set the parameter space boundary p = { # 'lr': [0.01], 'lr': [0.0001], # 'lr': [0.01], # 'lr': [0.001], # 'activation': ['relu', 'elu'], 'activation': ['relu'], 'kernel_initializer': ['random_uniform'], # 'optimizer': ['Nadam','Adam','SGD'], 'optimizer': ['Adam'], 'losses': ['logcosh'], 'shapes': ['brick'], 'first_neuron': [5], # 'first_neuron': [5], 'forecast_predictands': [forecast_nn.list_precursors], 'len_alpha': [len_alpha], # 'hidden_layers': [2, 3], 'hidden_layers': [3], 'dropout': [.1], # 'dropout': [.1], 'batch_size': [64], # 'batch_size': [5], 'epochs': [95, 105], # [800], 'last_activation': ['linear'], 'y_train': [y_train[predictand.var]], 'x_test': [X_test], 'y_test': [y_test[predictand.var]], 'composites_1d': [precursors.dict_composites], 'precursor': [precursors.var], 'pattern_corr': [1], 'time_corr': [1], 'taylor_skill': [0], # } # logger.info(f' precursor: {p["precursor"]}') index_df = 0 import talos as ta t = ta.Scan(x=alphas_train, y=y_train_pseudo, x_val=alphas_val, y_val=y_val_pseudo, model=forecast_nn.train_nn_talos, params=p, experiment_name='opt-nn-clustering')
def main(cl_parser: ClusteringParser, cl_config: dict): logger.info("Start forecast model opt") # load inifile according to variable # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given inifile = cl_parser.arguments['inifile'] output_label = cl_parser.arguments['outputlabel'] output_path = cl_parser.arguments['outputpath'] data_range = cl_parser.arguments['datarange'] logger.info(inifile) logger.info(output_path) logger.info(output_label) logger.info(data_range) predictand = Predictand(inifile, output_path, output_label, cl_config) dict_skills_pattern = {} # load forecast_nn-parameters method_name = 'ward' k = 5 forecast = Forecast(inifile, cl_config, k, method_name) logger.info("Clusters: " + str(forecast.k)) # load precursors precursors = Precursors(inifile, output_path, output_label, cl_config) forecast_precursors = cl_parser.arguments['forecast_precursors'] logger.info(forecast_precursors) y_train, X_train, y_test, X_test = train_test_split_pred( predictand, precursors, data_range, forecast_precursors) # Calculate clusters of precursors for var, by removing one year predictand.calculate_clusters_from_test_data(y_train, forecast.method_name, forecast.k) def skill(x, info): if lat_range(x) < 0 or lon_range(x) < 0: return 1 cut_area_opt = [x[0], x[1], 65, 90] # cut area and normalize data accordingly precursors.set_area_composite_opt(forecast_precursors[0], cut_area_opt) # Create train and test dataset with an 66:33 split # noinspection PyPep8Naming X_train, X_test = train_test_split_prec(precursors, data_range, forecast_precursors) # Calculate composites precursors.get_composites_data_1d_train_test(X_train, X_test, predictand.f, forecast.k, forecast.method_name, predictand.var) # Calculate forecast_nn for all years forecast.list_precursors = forecast_precursors pattern_corr_values = [] # Prediction forecast_data = np.zeros( (len(y_test[f"{predictand.var}"]), predictand.dict_pred_1D[f"{predictand.var}"].shape[1])) # logger.info(forecast_precursors) skills_score_predictor = np.zeros(len(y_test[predictand.var])) ax_predictor = np.zeros(len(y_test[predictand.var])) corr_predictor = np.zeros(len(y_test[predictand.var])) for year in range(len( y_test[predictand.var])): # len(y_test[predictand.var])): # print(year) if math.isnan( np.nanmean(precursors.dict_composites[ forecast_precursors[0]][0])): return 1 forecast_temp = forecast.prediction(predictand.clusters, precursors.dict_composites, X_test, year) # Assign forecast_nn data to array forecast_data[year] = forecast_temp a_xm = np.std(forecast_data[year]) a_xo = np.std(y_test[f"{predictand.var}"][year]) ax = a_xm / a_xo corr = stats.pearsonr(y_test[f"{predictand.var}"][year], forecast_data[year])[0] skills_score_predictor[year] = (4 * (1 + corr)) / ( 2 * (ax + 1 / ax)**2 ) # ((4*(1 + corr)**4) / ( 16 * (ax + 1/ax)**2)) ax_predictor[year] = ax corr_predictor[year] = corr # logger.info( # f"{a_xo} {a_xm} {x[2]:4f} {corr} {skills_score_predictor[year] }") # Calculate time correlation for each point time_correlation, significance = forecast.calculate_time_correlation_all_times( np.array(y_test[f"{predictand.var}"]), forecast_data) # display information # display information skills_score_mena = np.nanmean(skills_score_predictor) ax_mena = np.nanmean(ax_predictor) corr_mena = np.nanmean(corr_predictor) time_correlation_mean = np.nanmean(time_correlation) # if info['best_values'] <= time_correlation_mean: # info['best_values'] = time_correlation_mean if info['best_values'] <= skills_score_mena: info['best_values'] = skills_score_mena logger.info( f"{x[0]:4f} {x[1]:4f} {time_correlation_mean:4f} {skills_score_mena:4f} " f"{corr_mena:4f} {ax_mena:4f} {info['Nfeval']}") # if info['Nfeval'] % 2 == 0: # logger.info(f"{x[0]:4f} {x[1]:4f} {x[2]:4f} {x[3]:4f} {time_correlation_mean}") # if info['best_values'] >= time_correlation_mean: # info['best_values'] = time_correlation_mean # logger.info(f"{x[0]:4f} {x[1]:4f} {x[2]:4f} {x[3]:4f} {time_correlation_mean} {info['Nfeval']}") info['Nfeval'] += 1 # if math.isnan(time_correlation_mean): # return 1. # else: # return -time_correlation_mean return 1 - skills_score_mena cons = ({'type': 'ineq', 'fun': lon_range}, ) var = forecast_precursors[0] bounds = [(precursors.lon_min[var], precursors.lon_max[var] - 180), (precursors.lon_min[var] + 180, precursors.lon_max[var])] # ~ res = shgo(skill, bounds, args=({'Nfeval':0},), iters=10, constraints=cons) # res = dual_annealing(skill, bounds, args=({'Nfeval': 0, 'best_values': 0},), maxiter=5000) # 2020-07-18 19:12:16,756 - __main__ - INFO - 27.500000 82.500000 43.750000 316.250000 0.29794461371540987 0.17793906843190135 261 # 2020-07-19 16:18:40,972 - __main__ - INFO - 50.000000 67.500000 175.000000 316.250000 0.284150 0.502431 0.259510 0.948210 868 res = shgo(skill, bounds, args=({ 'Nfeval': 0, 'best_values': 0 }, ), constraints=cons, iters=5000) print(res)