def setUp(self):
        """initialize class cluster and composites"""
        # cluster
        cl_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/clusters_America_prec_t_test.ini"
        cl_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/"
        cl_output_label = "TEST"
        cl_config = Config("Test.log")
        self.predictand = Predictand(cl_inifile, cl_output_path,
                                     cl_output_label, cl_config.config_dict)
        # composite
        co_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/composites_America_PSL.ini"
        co_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/"
        co_output_label = "TEST"
        co_config = Config("Test.log")
        self.precursors = Precursors(co_inifile, co_output_path,
                                     co_output_label, co_config.config_dict)

        # set cluster method parameters
        self.method_name = "ward"
        self.k = 2
        self.predictand_var = "prec_t"
        # initialize Forecast class
        self.forecast_nn = ForecastNN(cl_inifile, cl_config.config_dict,
                                      self.k, self.method_name)

        self.initialize_data()
Example #2
0
def main(cl_parser: ClusteringParser, cl_config: dict):
    logger.info("Start forecast_nn model")

    # load inifile according to variable
    # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given
    inifile = cl_parser.arguments['inifile']
    output_label = cl_parser.arguments['outputlabel']
    output_path = cl_parser.arguments['outputpath']
    data_range = cl_parser.arguments['datarange']
    predictand = Predictand(inifile, output_path, output_label, cl_config)
    dict_skills_pattern = {}

    # load forecast_nn-parameters
    method_name = 'ward'
    k = 5
    forecast_nn = ForecastNN(inifile, output_path, output_label, cl_config, predictand.var, k, method_name)
    logger.info("Clusters: " + str(forecast_nn.k))

    # load precursors
    precursors = Precursors(inifile, output_path, output_label, cl_config)

    # Create train and test dataset with an 66:33 split
    # noinspection PyPep8Naming
    y_train, X_train, y_test, X_test = train_test_split_pred(predictand, precursors, data_range)

    # Calculate clusters of precursors for var, by removing one year
    predictand.calculate_clusters_from_test_data(y_train, forecast_nn.method_name, forecast_nn.k)
    # Calculate composites
    precursors.get_composites_data_1d_train_test(X_train, predictand.f, forecast_nn.k, forecast_nn.method_name,
                                                 predictand.var)
    # precursors.plot_composites(k, 1)
    # subtract train mean also for test data
    # for prec in forecast_nn.list_precursors_all:
    #     X_test[prec] -= precursors.varmean
    # y_test[predictand.var] -= predictand.varmean
    # df_parameters_opt = pd.DataFrame(columns=["precursor", "nr_neurons", "opt_method", "nr_epochs", "nr_layers", "lr_rate",
    #                                           "nr_batch_size", "time_correlation", "pattern_correlation"])

    nr_epochs = 500
    #for forecast_predictands in forecast_nn.list_precursors_combinations:
    # Calculate forecast_nn for all years
    # ~ forecast_nn.list_precursors = forecast_predictands
    forecast_nn.list_precursors = ["Z500"]
    list_methods = ["SGD","Adam"]
    forecast_predictands = forecast_nn.list_precursors


    index_df = 0
    def objective(opt_m, nr_batch_size, lr_rate, nr_layers, nr_neurons):
        opt_method = "Adam"
        # train small NN
        forecast_nn.train_nn_opt(forecast_nn.list_precursors, predictand.clusters,
                                 precursors.dict_composites, X_train,
                                 y_train[f"{predictand.var}"], nr_neurons, opt_method,
                                 nr_epochs, nr_layers,
                                 lr_rate, nr_batch_size)

        # Calculate forecast_nn for all years
        pattern_corr_values = []

        # Prediction
        forecast_data = np.zeros((len(y_test[f"{predictand.var}"]),
                                  predictand.dict_pred_1D[f"{predictand.var}"].shape[1]))
        logger.info(forecast_predictands)

        for year in range(len(y_test[predictand.var])):  # len(y_test[predictand.var])):
            print(year)
            forecast_temp = forecast_nn.prediction_nn(forecast_nn.list_precursors_all,
                                                      predictand.clusters,
                                                      precursors.dict_composites, X_test, year)
            # Assign forecast_nn data to array
            forecast_data[year] = forecast_temp

            # Calculate pattern correlation
            pattern_corr_values.append(
                stats.pearsonr(forecast_temp, y_test[f"{predictand.var}"][year])[0])

        # Calculate time correlation for each point
        time_correlation, significance = forecast_nn.calculate_time_correlation_all_times(
            np.array(y_test[f"{predictand.var}"]), forecast_data)

        # Reshape correlation maps
        pred_t_corr_reshape = np.reshape(time_correlation,
                                         (predictand.dict_predict[predictand.var].shape[1],
                                          predictand.dict_predict[predictand.var].shape[2]))
        significance_corr_reshape = np.reshape(significance, (
            predictand.dict_predict[predictand.var].shape[1],
            predictand.dict_predict[predictand.var].shape[2]))

        logger.info(f'time correlation: {np.nanmean(pred_t_corr_reshape)}')
        logger.info(f'pattern correlation: {np.nanmean(pattern_corr_values)}')

        logger.info("Plot and save variables")
        ex = ExportVarPlot(output_label, cl_config)

        ex.save_plot_and_time_correlationNN(forecast_nn.list_precursors, predictand,
                                          pred_t_corr_reshape,
                                          significance_corr_reshape,
                                          forecast_nn.list_precursors_all,
                                          np.nanmean(pred_t_corr_reshape), nr_neurons,
        opt_method, nr_epochs, nr_layers, lr_rate, nr_batch_size)
        df_parameters_opt = pd.DataFrame({"precursor": ex.predictor_names, "nr_neurons": nr_neurons,
                                  "opt_method": opt_method, "nr_epochs": nr_epochs,
                                  "nr_layers": nr_layers, "lr_rate": lr_rate,
                                  "nr_batch_size": nr_batch_size,
                                  "time_correlation": np.nanmean(pred_t_corr_reshape),
                                  "pattern_correlation": np.nanmean(pattern_corr_values),}, index=[index_df])
        filename = f'output-{output_label}/skill_correlation-{predictand.var}-opt-sim.csv'
        with open(filename, 'a') as f:
            df_parameters_opt.to_csv(f, header=f.tell() == 0)
            index_df +=1
        return time_correlation
Example #3
0
def main(cl_parser: ClusteringParser, cl_config: dict):
    logger.info("Start forecast_nn model opt")

    # load inifile according to variable
    # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given
    inifile = cl_parser.arguments['inifile']
    output_label = cl_parser.arguments['outputlabel']
    output_path = cl_parser.arguments['outputpath']
    data_range = cl_parser.arguments['datarange']
    predictand = Predictand(inifile, output_path, output_label, cl_config)
    dict_skills_pattern = {}

    # load forecast_nn-parameters
    method_name = 'ward'
    k = 5
    forecast_nn = ForecastNN(inifile, output_path, output_label, cl_config,
                             predictand.var, k, method_name)
    logger.info("Clusters: " + str(forecast_nn.k))

    # load precursors
    precursors = Precursors(inifile, output_path, output_label, cl_config)

    # Create train and test dataset with an 66:33 split
    # noinspection PyPep8Naming
    y_train, X_train, y_test, X_test = train_test_split_pred(
        predictand, precursors, data_range)

    # Calculate clusters of precursors for var, by removing one year
    predictand.calculate_clusters_from_test_data(y_train,
                                                 forecast_nn.method_name,
                                                 forecast_nn.k)
    # ~ predictand.plot_composites(forecast_nn.k, 0.00001)

    # Calculate composites
    precursors.get_composites_data_1d_train_test(X_train, predictand.f,
                                                 forecast_nn.k,
                                                 forecast_nn.method_name,
                                                 predictand.var)
    # precursors.plot_composites(k, 1)
    # subtract train mean also for test data
    # for prec in forecast_nn.list_precursors_all:
    #     X_test[prec] -= precursors.varmean
    # y_test[predictand.var] -= predictand.varmean

    for forecast_predictands in forecast_nn.list_precursors_combinations:
        # Calculate forecast_nn for all years
        forecast_nn.list_precursors = forecast_predictands

        # train small NN
        forecast_nn.train_nn(forecast_nn.list_precursors, predictand.clusters,
                             precursors.dict_composites, X_train,
                             y_train[f"{predictand.var}"])

        # Calculate forecast_nn for all years
        pattern_corr_values = []

        # Prediction
        forecast_data = np.zeros(
            (len(y_test[f"{predictand.var}"]),
             predictand.dict_pred_1D[f"{predictand.var}"].shape[1]))
        logger.info(forecast_predictands)

        for year in range(len(
                y_test[predictand.var])):  # len(y_test[predictand.var])):
            print(year)
            forecast_temp = forecast_nn.prediction_nn_model(
                forecast_nn.list_precursors_all, predictand.clusters,
                precursors.dict_composites, X_test, year)
            # Assign forecast_nn data to array
            forecast_data[year] = forecast_temp

            # Calculate pattern correlation
            # remove zeros from array
            # forecast_temp = forecast_temp[forecast_temp != 0]
            # obs_temp = y_test[f"{predictand.var}"][year][y_test[f"{predictand.var}"][year] != 0]
            pattern_corr_values.append(
                stats.pearsonr(forecast_temp,
                               y_test[f"{predictand.var}"][year])[0])

        # Calculate time correlation for each point
        time_correlation, significance = forecast_nn.calculate_time_correlation_all_times(
            np.array(y_test[f"{predictand.var}"]), forecast_data)

        # Reshape correlation maps
        pred_t_corr_reshape = np.reshape(
            time_correlation,
            (predictand.dict_predict[predictand.var].shape[1],
             predictand.dict_predict[predictand.var].shape[2]))
        significance_corr_reshape = np.reshape(
            significance, (predictand.dict_predict[predictand.var].shape[1],
                           predictand.dict_predict[predictand.var].shape[2]))

        logger.info(f'time correlation: {np.nanmean(pred_t_corr_reshape)}')
        logger.info(f'pattern correlation: {np.nanmean(pattern_corr_values)}')

        # Plot correlation map, if specified in ini-file
        if forecast_nn.plot:
            logger.info("Plot and save variables")
            ex = ExportVarPlot(output_label, cl_config)
            ex.save_plot_and_time_correlation(forecast_nn.list_precursors,
                                              predictand, pred_t_corr_reshape,
                                              significance_corr_reshape,
                                              forecast_nn.list_precursors_all,
                                              np.nanmean(pred_t_corr_reshape))
            dict_skills_pattern[ex.predictor_names] = {
                'time correlation': np.nanmean(pred_t_corr_reshape),
                'pattern correlation': np.nanmean(pattern_corr_values)
            }
    if forecast_nn.plot:
        with open(
                f'{output_path}/output-{output_label}/skill_correlation-{predictand.var}.json',
                'w') as fp:
            json.dump(dict_skills_pattern, fp)
class TestForecastNN(unittest.TestCase):
    """ Create test class for Forcast"""
    def setUp(self):
        """initialize class cluster and composites"""
        # cluster
        cl_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/clusters_America_prec_t_test.ini"
        cl_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/"
        cl_output_label = "TEST"
        cl_config = Config("Test.log")
        self.predictand = Predictand(cl_inifile, cl_output_path,
                                     cl_output_label, cl_config.config_dict)
        # composite
        co_inifile = "/home/sonja/Documents/Clustering-Forecast/ini/composites_America_PSL.ini"
        co_output_path = "/home/sonja/Documents/Clustering-Forecast/tests/"
        co_output_label = "TEST"
        co_config = Config("Test.log")
        self.precursors = Precursors(co_inifile, co_output_path,
                                     co_output_label, co_config.config_dict)

        # set cluster method parameters
        self.method_name = "ward"
        self.k = 2
        self.predictand_var = "prec_t"
        # initialize Forecast class
        self.forecast_nn = ForecastNN(cl_inifile, cl_config.config_dict,
                                      self.k, self.method_name)

        self.initialize_data()

    def initialize_data(self):
        """ initialize toy data to test algorithm"""
        # create data for the two different composites
        # first two are snow data and second two data points are ice data
        self.gaussian_distributions = [
            {
                "mean": [-1, 1, 1, -1],
                "sigma": [[0.00001, 0., 0., 0.], [0., 0.00001, 0., 0.],
                          [0., 0., 0.00001, 0.], [0., 0., 0., 0.00001]]
            },
            {
                "mean": [-1, 0, 1, 1],
                "sigma": [[0.00001, 0., 0., 0.], [0., 0.00001, 0., 0.],
                          [0., 0., 0.00001, 0.], [0., 0., 0., 0.00001]]
            },
        ]

        # create time series
        self.t_end = 5000
        self.time_series = range(self.t_end)

        # create instance to get samples for sic and sce
        precursors = MixtureGaussianModel(self.gaussian_distributions)
        # get samples
        self.X = (precursors.rvs(self.t_end))

        # array which lead with composites to clusters pf PRCP
        self.array = np.array(
            [[1, 2, 1, 1], [-0.5, 0, -0.5, 1.], [-1, 0, -1, -1]], np.float)
        self.prcp_clusters = [{"cluster": [1, -1, 1]}, {"cluster": [1, 1, -1]}]
        self.prcp = PredictandToyModel(self.prcp_clusters, self.array)
        self.y = self.prcp.get_data_from_precursors(self.X)

        # set data to predictand input arrays
        self.predictand.dict_standardized_pred_1D[self.predictand.var] = self.y
        self.predictand.dict_pred_1D[self.predictand.var] = self.y

        # set data to precursors input data
        self.precursors.dict_precursors["snow"] = self.X[:, :2]
        self.precursors.dict_standardized_precursors["snow"] = self.X[:, :2]
        self.precursors.dict_prec_1D["snow"] = self.X[:, :2]
        self.precursors.dict_precursors["ice"] = self.X[:, 2:]
        self.precursors.dict_standardized_precursors["ice"] = self.X[:, 2:]
        self.precursors.dict_prec_1D["ice"] = self.X[:, 2:]

        self.precursors.dict_standardized_precursors.pop("PSL")
        self.precursors.dict_prec_1D.pop("PSL")
        self.precursors.dict_precursors.pop("PSL")
        # Create train and test dataset with an 66:33 split
        self.y_train, self.X_train, self.y_test, self.X_test = self.train_test_split_pred(
            self.predictand,
            self.precursors,
            test_size=0.66,
            random_state=2019)

    @staticmethod
    def train_test_split_pred(predictand,
                              precursors,
                              test_size=0.66,
                              random_state=2019):
        np.random.seed(random_state)
        len_predicts = len(predictand.dict_pred_1D[predictand.var])
        len_test_data = int(len_predicts * test_size)
        selected_time_steps = np.random.choice(len_predicts,
                                               len_test_data,
                                               replace=False)
        y_train = {}
        # noinspection PyPep8Naming
        X_train = {}
        y_test = {}
        # noinspection PyPep8Naming
        X_test = {}

        for i in range(len_predicts):
            if i in selected_time_steps:
                y_train.setdefault(predictand.var, []).append(
                    predictand.dict_pred_1D[predictand.var][i])
                for prec in precursors.dict_precursors.keys():
                    X_train.setdefault(prec, []).append(
                        precursors.dict_prec_1D[prec][i])
            else:
                y_test.setdefault(predictand.var, []).append(
                    predictand.dict_pred_1D[predictand.var][i])
                for prec in precursors.dict_precursors.keys():
                    X_test.setdefault(prec, []).append(
                        precursors.dict_prec_1D[prec][i])
        return y_train, X_train, y_test, X_test

    def calculate_clusters_and_composites(self):
        # Calculate clusters of precursors for var, by removing one year
        self.calculate_clusters_from_test_data(self.y_train, self.method_name,
                                               self.k)

        # Calculate composites
        self.precursors.get_composites_data_1d_train_test(
            self.X_train, self.predictand.f, self.k, self.method_name,
            self.predictand_var)

    def calculate_forecast(self):
        """calculate forecast_nn using toy model data"""
        self.calculate_clusters_and_composites()
        self.forecast_nn.list_precursors_all = ["snow", "ice"]
        self.forecast_nn.list_precursors_combinations = [["snow"], ["ice"],
                                                         ["snow", "ice"]]

        # for this test purpose we take both precursors
        #train model using training data
        self.forecast_nn.train_nn(self.forecast_nn.list_precursors_all,
                                  self.predictand.clusters,
                                  self.precursors.dict_composites,
                                  self.X_train,
                                  self.y_train[self.predictand_var])

        self.forecast_data = np.zeros(
            (len(self.y_test[self.predictand.var]),
             self.predictand.dict_pred_1D[f"{self.predictand.var}"].shape[1]))
        # Calculate forecast_nn for all years
        self.pattern_corr_values = []
        # Prediction
        for year in range(len(self.y_test[
                self.predictand.var])):  # len(y_test[predictand.var])):
            forecast_temp = self.forecast_nn.prediction_nn(
                self.forecast_nn.list_precursors_all, self.predictand.clusters,
                self.precursors.dict_composites, self.X_test, year)
            # Assign forecast_nn data to array
            self.forecast_data[year] = forecast_temp

            # Calculate pattern correlation
            self.pattern_corr_values.append(
                round(
                    stats.pearsonr(self.forecast_data[year],
                                   self.y_test[self.predictand.var][year])[0]))

        # Round data for correlation analysis
        for j in range(len(self.y_test[self.predictand.var])):
            for i in range(len(self.y_test[self.predictand.var][j])):
                self.y_test[self.predictand.var][j][i] = round(
                    self.y_test[self.predictand.var][j][i])
                self.forecast_data[j][i] = round(self.forecast_data[j][i])

    def calculate_clusters_from_test_data(self, train_data: dict,
                                          method_name: str, k: int):
        """
        calculate clusters for predictand variable
        :param train_data: cluster data which should be used to calculate clusters
        :param method_name: name of the method used for clustering
        :param k: number of clusters
        """
        print('Calculate clusters')
        self.predictand.dict_standardized_pred_1D = train_data
        self.predictand._set_method_name(method_name)
        self.predictand._set_k(k)
        self.predictand._set_linkage()
        self.predictand._set_f()
        self.predictand._cluster_frequency()
        self.predictand._set_clusters_1d()
def main(cl_parser: ClusteringParser, cl_config: dict):
    logger.info("Start forecast_nn model")

    # load inifile according to variable
    # var = cl_parser.arguments['predictand'] # not needed anymore, because total inifile is given
    inifile = cl_parser.arguments['inifile']
    output_label = cl_parser.arguments['outputlabel']
    output_path = cl_parser.arguments['outputpath']
    data_range = cl_parser.arguments['datarange']
    data_range = cl_parser.arguments['datarange']
    predictand = Predictand(inifile, output_path, output_label, cl_config)
    dict_skills_pattern = {}

    # load precursors
    precursors = Precursors(inifile, output_path, output_label, cl_config)

    # load forecast_nn-parameters
    method_name = 'ward'
    k = 5

    # unfortunately, I can not load the library tensorflow and therefore the class ForecastNN
    # as the beginning, because netcdf load function for xarray does not work then
    from classes.ForecastNN import ForecastNN
    forecast_nn = ForecastNN(inifile, output_path, output_label, cl_config,
                             predictand.var, k, method_name)
    logger.info("Clusters: " + str(forecast_nn.k))

    # Create train and test dataset with an 66:33 split
    # noinspection PyPep8Naming
    y_train, X_train, y_test, X_test = train_test_split_pred(
        predictand, precursors, data_range)

    # Calculate clusters of precursors for var, by removing one year
    predictand.calculate_clusters_from_test_data(y_train,
                                                 forecast_nn.method_name,
                                                 forecast_nn.k)
    # Calculate composites
    precursors.get_composites_data_1d_train_test(X_train, X_test, predictand.f,
                                                 forecast_nn.k,
                                                 forecast_nn.method_name,
                                                 predictand.var)

    # precursors.plot_composites(k, 1)
    # subtract train mean also for test data
    # for prec in forecast_nn.list_precursors_all:
    #     X_test[prec] -= precursors.varmean
    # y_test[predictand.var] -= predictand.varmean
    df_parameters_opt = pd.DataFrame(columns=[
        "precursor", "nr_neurons", "opt_method", "nr_epochs", "nr_layers",
        "lr_rate", "nr_batch_size", "time_correlation", "pattern_correlation"
    ])

    #     nr_epochs = 500
    #for forecast_predictands in forecast_nn.list_precursors_combinations:
    # Calculate forecast_nn for all years
    # ~ forecast_nn.list_precursors = forecast_predictands
    forecast_precursors = cl_parser.arguments['forecast_precursors']
    logger.info(forecast_precursors)
    forecast_nn.list_precursors = forecast_precursors
    list_methods = ["SGD", "Adam"]
    forecast_predictands = forecast_nn.list_precursors

    dict_calc_X_y = {
        'composites_1d': precursors.dict_composites,
        'forecast_predictands': forecast_nn.list_precursors,
        'clusters_1d': predictand.clusters,
    }

    # calculate pseudo-values meaning that y contains at element 0 the time step for the correct forecast variable
    # Cannot do it differently, because y-train values have to have to same dimension than the output, we would like to
    # get (beta-values), comparison can done differently
    alphas_train, alphas_val, y_train_pseudo, y_val_pseudo = forecast_nn.calc_alphas_for_talos(
        X_train, y_train[predictand.var], dict_calc_X_y)
    len_alpha = len(alphas_train)

    # set the parameter space boundary
    p = {
        # 'lr': [0.01],
        'lr': [0.0001],
        # 'lr': [0.01],
        # 'lr': [0.001],
        # 'activation': ['relu', 'elu'],
        'activation': ['relu'],
        'kernel_initializer': ['random_uniform'],
        # 'optimizer': ['Nadam','Adam','SGD'],
        'optimizer': ['Adam'],
        'losses': ['logcosh'],
        'shapes': ['brick'],
        'first_neuron': [5],
        # 'first_neuron': [5],
        'forecast_predictands': [forecast_nn.list_precursors],
        'len_alpha': [len_alpha],
        # 'hidden_layers': [2, 3],
        'hidden_layers': [3],
        'dropout': [.1],
        # 'dropout': [.1],
        'batch_size': [64],
        # 'batch_size': [5],
        'epochs': [95, 105],  # [800],
        'last_activation': ['linear'],
        'y_train': [y_train[predictand.var]],
        'x_test': [X_test],
        'y_test': [y_test[predictand.var]],
        'composites_1d': [precursors.dict_composites],
        'precursor': [precursors.var],
        'pattern_corr': [1],
        'time_corr': [1],
        'taylor_skill': [0],
        #
    }
    # logger.info(f' precursor: {p["precursor"]}')
    index_df = 0
    import talos as ta
    t = ta.Scan(x=alphas_train,
                y=y_train_pseudo,
                x_val=alphas_val,
                y_val=y_val_pseudo,
                model=forecast_nn.train_nn_talos,
                params=p,
                experiment_name='opt-nn-clustering')