def mean_GPP_nn(): preds = mlp_predictions() m = np.mean(preds, axis=0) X, Y, Y_Preles = preprocessing.preprocessing() years = [ 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 ] arr = np.zeros((365, 13)) for i in range(len(years)): x, y, y_nn = preprocessing.split_by_year(X, Y, m, years=[years[i]]) arr[:, i] = y_nn[:365] fig, ax = plt.subplots(figsize=(8, 6), dpi=100) #figsize=(8,6), dpi=100 CI = np.quantile(arr, (0.05, 0.95), axis=1) fig.tight_layout(pad=1.5) ax.fill_between(np.arange(365), CI[0], CI[1], color="lightgreen", alpha=0.5) ax.plot(np.arange(365), np.mean(arr, axis=1), color="green", label="$\widehat{p2}_{m2} - \widehat{p2}_{m1}$", linewidth=1.0) ax.set_ylabel("GPP [g C m$^{-2}$ day$^{-1}$] ") ax.set_xlabel("Day of year") ax.set_ylim(-1, 20)
def correlation_ts(var, data, corr="pearson", nn_preds=None): X, Y, Y_Preles = preprocessing.preprocessing() if not nn_preds is None: preds = nn_preds else: preds = mlp_predictions() Y_NN = np.mean(preds, axis=0) if data == "observed": X['GPP'] = Y elif data == "preles": X['GPP'] = Y_Preles elif data == "nn": X['GPP'] = Y_NN years = list(X['year'].unique()) corrs = [] for y in years: ys = [x for x in years if x != y] x, y, y_preles = preprocessing.split_by_year(X, Y, Y_Preles, years=ys, drop_year=False) GPP = x.groupby('year')['GPP'] TAir = x.groupby('year')[var] gpp_series = GPP.apply(myfunc, agg=("sum")) tair_series = TAir.apply(myfunc, agg=("mean")) gpp_m = [] for i in range(len(gpp_series)): gpp_m.append(gpp_series.to_numpy()[i]) gpp_m = np.array(gpp_m) tair_m = [] for i in range(len(tair_series)): tair_m.append(tair_series.to_numpy()[i]) tair_m = np.array(tair_m) c = [] for i in range(365): if corr == "pearson": c.append(pearsonr(tair_m[:, i], gpp_m[:, i])[0]) else: c.append(spearmanr(tair_m[:, i], gpp_m[:, i])[0]) corrs.append(c) return np.array(corrs)
def mlp_predictions(randomsearch=False): if randomsearch: layersizes = random_search.architecture_search() # 7,32,32,16,1 hparams = random_search.hparams_search(layersizes) # 0.005, 16 else: layersizes = [7, 32, 32, 16, 1] hparams = [0.005, 16] hparams_setting = { "epochs": 1000, "batchsize": hparams[1], "learningrate": hparams[0], "history": 1 } model_design = {"layer_sizes": layersizes} X, Y, Y_Preles = preprocessing.preprocessing() arr = [] years = [ 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 ] for year in years: x, y, y_nn = preprocessing.split_by_year(X, Y, Y_Preles, years=[year]) #x = x.drop(["year"], axis=1) running_losses = training.train(hparams_setting, model_design, x.to_numpy(), y.to_numpy(), "D1") preds, mae, nse = prediction.predict(hparams_setting, model_design, x.to_numpy(), y.to_numpy(), "D1") arr.append(preds) arr = np.concatenate(arr, axis=1) return arr
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri May 14 14:34:57 2021 @author: Marieke_Wesselkamp """ import random import preprocessing import training import numpy as np import pandas as pd #%% X, Y, Y_Preles = preprocessing.preprocessing() X_P1, Y_P1, Y_Preles_P1 = preprocessing.split_by_year(X, Y, Y_Preles, years=[2000, 2001]) #%% def architecture_searchspace(input_size, output_size, gridsize, max_layers=3): grid = [] for i in range(gridsize): layersizes = [input_size] nlayers = random.randint(1, max_layers) for i in range(nlayers): size = random.choice([4, 8, 16, 32]) layersizes.append(size) layersizes.append(output_size) if layersizes not in grid: grid.append(layersizes)
def fit_by_year(): hparams_setting = { "epochs": 500, "batchsize": hparams[1], "learningrate": hparams[0], "history": 1 } model_design = {"layer_sizes": layersizes} years = [ 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2000, 2001, 2002 ] df = pd.DataFrame(columns=[ "eval_year", "mae_d1m1", "mae_d1m2", "mae_d2m1", "mae_d2m2", "nse_d1m1", "nse_d1m2", "nse_d2m1", "nse_d2m2" ]) for i in range(len(years) - 3): X_P1, Y_P1, Y_Preles_P1 = preprocessing.split_by_year( X, Y, Y_Preles, years=[years[i], years[i + 1]]) X_P2, Y_P2, Y_Preles_P2 = preprocessing.split_by_year( X, Y, Y_Preles, years=[years[i + 2], years[i + 3]]) running_losses_d1p1 = training.train(hparams_setting, model_design, X_P1.to_numpy(), Y_P1.to_numpy(), "D1P1") running_losses_d1p2 = training.train(hparams_setting, model_design, X_P2.to_numpy(), Y_P2.to_numpy(), "D1P2") running_losses_d2p1 = training.train(hparams_setting, model_design, X_P1.to_numpy(), Y_Preles_P1.to_numpy(), "D2P1") running_losses_d2p2 = training.train(hparams_setting, model_design, X_P2.to_numpy(), Y_Preles_P2.to_numpy(), "D2P2") preds_d1m1, mae_d1m1, nse_d1m1 = prediction.predict( hparams_setting, model_design, X_P2.to_numpy(), Y_P2.to_numpy(), "D1P1") preds_d1m2, mae_d1m2, nse_d1m2 = prediction.predict( hparams_setting, model_design, X_P2.to_numpy(), Y_P2.to_numpy(), "D1P2") preds_d2m1, mae_d2m1, nse_d2m1 = prediction.predict( hparams_setting, model_design, X_P2.to_numpy(), Y_Preles_P2.to_numpy(), "D2P1") preds_d2m2, mae_d2m2, nse_d2m2 = prediction.predict( hparams_setting, model_design, X_P2.to_numpy(), Y_Preles_P2.to_numpy(), "D2P2") df = df.append( { "eval_year": years[i + 1], "mae_d1m1": np.mean(mae_d1m1), "mae_d1m2": np.mean(mae_d1m2), "mae_d2m1": np.mean(mae_d2m1), "mae_d2m2": np.mean(mae_d2m2), "nse_d1m1": np.mean(nse_d1m1), "nse_d1m2": np.mean(nse_d1m2), "nse_d2m1": np.mean(nse_d2m1), "nse_d2m2": np.mean(nse_d2m2) }, True) df.to_excel(r"results/fit_by_year.xlsx") df.to_csv(r"results/fit_by_year.csv") return df
import preprocessing import visualizations import utils import training import prediction import random_search import matplotlib.pyplot as plt import numpy as np import pandas as pd from scipy.stats import pearsonr from scipy.stats import spearmanr #%% X, Y, Y_Preles = preprocessing.preprocessing() X_P1s, Y_P1s, Y_Preles_P1s = preprocessing.split_by_year(X, Y, Y_Preles, years=[2001]) X_P2s, Y_P2s, Y_Preles_P2s = preprocessing.split_by_year(X, Y, Y_Preles, years=[2002]) X_P1, Y_P1, Y_Preles_P1 = preprocessing.split_by_year(X, Y, Y_Preles, years=[2001, 2002]) X_P2, Y_P2, Y_Preles_P2 = preprocessing.split_by_year(X, Y, Y_Preles, years=[2003, 2004])