Beispiel #1
0
def settings(model, epochs, data_dir, sims=True):

    if sims:
        gridsearch_results = pd.read_csv(
            os.path.join(
                data_dir,
                f"python\outputs\grid_search\simulations\grid_search_results_{model}2_adaptPool.csv"
            ))
    else:
        gridsearch_results = pd.read_csv(
            os.path.join(
                data_dir,
                f"python\outputs\grid_search\observations\mlp\grid_search_results_{model}2.csv"
            ))

    setup = gridsearch_results.iloc[
        gridsearch_results['mae_val'].idxmin()].to_dict()

    dimensions = literal_eval(setup["hiddensize"])
    dimensions.append(1)  # adds the output dimension!

    if sims:
        featuresize = setup["featuresize"]
    else:
        featuresize = None

    hparams = {
        "batchsize": int(setup["batchsize"]),
        "epochs": epochs,
        "history": int(setup["history"]),
        "hiddensize": literal_eval(setup["hiddensize"]),
        "learningrate": setup["learningrate"]
    }

    model_design = {
        "dimensions": dimensions,
        "activation": nn.ReLU,
        "featuresize": featuresize
    }

    X, Y = preprocessing.get_splits(
        sites=['hyytiala'],
        years=[2001, 2002, 2003, 2004, 2005, 2006, 2007],
        datadir=os.path.join(data_dir, "data"),
        dataset="profound",
        simulations=None)
    X_test, Y_test = preprocessing.get_splits(sites=['hyytiala'],
                                              years=[2008],
                                              datadir=os.path.join(
                                                  data_dir, "data"),
                                              dataset="profound",
                                              simulations=None)

    return hparams, model_design, X, Y, X_test, Y_test
Beispiel #2
0
def preles_errors(
        site,
        data_dir="OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"):

    X_test, Y_test = preprocessing.get_splits(sites=[site],
                                              years=[2008],
                                              datadir=os.path.join(
                                                  data_dir, "data"),
                                              dataset="profound",
                                              simulations=None)

    prelesGPP_def = pd.read_csv(
        f"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\data\profound\output{site}2008def",
        sep=";")
    prelesGPP_calib = pd.read_csv(
        f"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\data\profound\output{site}2008calib",
        sep=";")

    rmse_train = utils.rmse(Y_test, prelesGPP_def)[0]
    rmse_val = utils.rmse(Y_test, prelesGPP_calib)[0]
    mae_train = metrics.mean_absolute_error(Y_test, prelesGPP_def)
    mae_val = metrics.mean_absolute_error(Y_test, prelesGPP_calib)

    errors = [rmse_train, rmse_val, mae_train, mae_val]

    return (errors)
def plot3d(sparse = False):
    
    data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
    X, Y = preprocessing.get_splits(sites = ['hyytiala'],
                                years = [2008],
                                datadir = os.path.join(data_dir, "data"), 
                                dataset = "profound",
                                simulations = None)
    Y_preds = np.load(r"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\python\outputs\sparse\models\mlp8\sparse1\setting1\y_preds.npy", allow_pickle=True)
    
    visualizations.plot_prediction(Y, Y_preds, "Hyytiälä (2008)")
    plt.legend(loc="upper right")
def plot3e(sparse = False):
    
    data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
    X, Y = preprocessing.get_splits(sites = ['hyytiala'],
                                years = [2008],
                                datadir = os.path.join(data_dir, "data"), 
                                dataset = "profound",
                                simulations = None)
    Y_preds = np.load(r"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\python\outputs\models\lstm0\y_preds.npy", allow_pickle=True)
    
    visualizations.plot_prediction(Y, Y_preds, "Hyytiälä (2008)")
    plt.legend(loc="upper right")
    
    mae = metrics.mean_absolute_error(Y[:Y_preds.shape[1]], np.mean(Y_preds, 0))
    plt.text(10,10, f"MAE = {np.round(mae, 4)}")
def plot3f(years=[2001,2002,2003, 2004, 2005, 2006, 2007]):
    
    data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
    X, Y = preprocessing.get_splits(sites = ['hyytiala'],
                                years = [2008],
                                datadir = os.path.join(data_dir, "data"), 
                                dataset = "profound",
                                simulations = None)
    
    predictions_test, errors = finetuning.featureExtractorC("mlp", 10, None, 50,
                      years = years)
    Y_preds = np.array(predictions_test)
    
    visualizations.plot_prediction(Y, Y_preds, "Hyytiälä (2008)")
    plt.legend(loc="upper right")
    
    mae = metrics.mean_absolute_error(Y, np.mean(Y_preds, 0))
    plt.text(10,10, f"MAE = {np.round(mae, 4)}")
def plot3a():
    data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
    X, Y = preprocessing.get_splits(sites = ['hyytiala'],
                                years = [2008],
                                datadir = os.path.join(data_dir, "data"), 
                                dataset = "profound",
                                simulations = None)

    #Y_preles = pd.read_csv(os.path.join(data_dir ,r"data\profound\outputhyytiala2008def"), sep=";")
    #Y_preles_calib = pd.read_csv(os.path.join(data_dir ,r"data\profound\outputhyytiala2008calib"), sep=";")

    fig, ax = plt.subplots(figsize=(7,7))
    fig.suptitle("Hyytiälä (2008)")
    ax.plot(Y, color="green", label="Ground Truth", marker = "o", linewidth=0.8, alpha=0.9, markerfacecolor='green', markersize=4)
    #ax.plot(Y_preles, color="blue", label="PRELES \nPredictions", marker = "", alpha=0.5)
    #ax.plot(Y_preles_calib, color="green", label="PRELES \nPredictions", marker = "", alpha=0.5)
    ax.set(xlabel="Day of Year", ylabel="GPP [g C m$^{-2}$ day$^{-1}$]")
    plt.legend()
def plot4(w, model, years=[2001,2002,2003, 2004, 2005, 2006, 2007]):
    
    def moving_average(x, w):
        return np.convolve(x, np.ones(w), 'valid') / w

    data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
    X, Y = preprocessing.get_splits(sites = ['hyytiala'],
                                years = [2008],
                                datadir = os.path.join(data_dir, "data"), 
                                dataset = "profound",
                                simulations = None)
    Y_preles = pd.read_csv(os.path.join(data_dir ,r"data\profound\outputhyytiala2008calib"), sep=";")
    Y_nn = np.transpose(np.load(r"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\python\outputs\models\mlp0\noPool\sigmoid\y_preds.npy", allow_pickle=True).squeeze(2))
    predictions_test, errors = finetuning.featureExtractorC("mlp", 10, None, 50,
                      years = years)
    Y_nn_f = np.transpose(np.array(predictions_test).squeeze(2))
    
    mt = moving_average(Y.squeeze(1), w)
    mp = moving_average(Y_preles.squeeze(1), w)
    mn = moving_average(np.mean(Y_nn, axis=1), w)
    mnf = moving_average(np.mean(Y_nn_f, axis=1), w)
    
    plt.figure(num=None, figsize=(7, 7), facecolor='w', edgecolor='k')
    plt.plot(mt, label="Groundtruth", color="lightgrey")
    if model=="preles":
        plt.plot(mp, label="PRELES \npredictions", color="green")
        maep = metrics.mean_absolute_error(mt, mp)
        plt.text(10,9, f"MAE = {np.round(maep, 4)}")
    elif model=="mlp0":
        plt.plot(mn, label="MLP \npredictions", color="green")
        maen = metrics.mean_absolute_error(mt, mn)
        plt.text(10,9, f"MAE = {np.round(maen, 4)}")
    elif model=="mlp10":
        plt.plot(mnf, label="Finetuned MLP \npredictions", color="green")
        maen = metrics.mean_absolute_error(mt, mnf)
        plt.text(10,9, f"MAE = {np.round(maen, 4)}")
    plt.xlabel("Day of Year")
    plt.ylabel("Average GPP over 7 days [g C m$^{-2}$ day$^{-1}$]")
    plt.legend()
import setup.preprocessing as preprocessing
from setup.dev_mlp import _selection_parallel

import pandas as pd
import time
import os.path
import multiprocessing as mp
import itertools
import torch.nn as nn

#%% Load Data
data_dir = r"/home/fr/fr_fr/fr_mw263"

X, Y = preprocessing.get_splits(sites = ['bily_kriz', 'soro','collelongo'],
                                years = [2001,2002,2003,2004,2005,2006, 2007],
                                datadir = os.path.join(data_dir, "scripts/data"), 
                                dataset = "profound",
                                simulations = None)

X_test, Y_test = preprocessing.get_splits(sites = ['bily_kriz', 'soro','collelongo'],
                                years = [2008],
                                datadir = os.path.join(data_dir, "scripts/data"), 
                                dataset = "profound",
                                simulations = None)

#%% Grid search of hparams
hiddensize = [16, 64, 128, 256]
batchsize = [16, 64, 128, 256]
learningrate = [1e-5, 1e-4, 1e-3, 5e-3, 1e-2]
history = [0,1,2]
activation = [nn.ReLU]
Beispiel #9
0
import sys
sys.path.append(
    'OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\python')

import finetuning
import setup.preprocessing as preprocessing
import visualizations

import os.path
import numpy as np

#%% Load Data: Profound in and out.
datadir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
X, Y = preprocessing.get_splits(sites=['hyytiala'],
                                years=[2001, 2002, 2003, 2004, 2005, 2006],
                                datadir=os.path.join(datadir, "data"),
                                dataset="profound",
                                simulations=None)

#%%
pretrained_model = visualizations.losses("mlp", 7, "")

running_losses, performance, y_tests, y_preds = finetuning.finetune(
    X, Y, epochs=100, model="mlp", pretrained_type=7)
#%%
visualizations.plot_running_losses(running_losses["mae_train"],
                                   running_losses["mae_val"], "", "mlp")
print(np.mean(np.array(performance), axis=0))

res_mlp = visualizations.losses("mlp", 0, "")
@author: marie
"""

import setup.preprocessing as preprocessing
import os.path

import multiprocessing as mp
import pandas as pd
import numpy as np

#%% Load Data: Profound in and out.
data_dir = r"/home/fr/fr_fr/fr_mw263"

X, Y = preprocessing.get_splits(
    sites=['hyytiala'],
    years=[2001, 2002, 2003, 2004, 2005, 2006, 2007],
    datadir=os.path.join(data_dir, "scripts/data"),
    dataset="profound",
    simulations=None)

X_test, Y_test = preprocessing.get_splits(sites=['hyytiala'],
                                          years=[2008],
                                          datadir=os.path.join(
                                              data_dir, "scripts/data"),
                                          dataset="profound",
                                          simulations=None)


def subset_data(data, perc):

    n_subset = int(np.floor(data.shape[0] / 100 * perc))
    subset = data[:n_subset, :]
Beispiel #11
0
#%% Set working directory
import setup.preprocessing as preprocessing
from setup.dev_mlp import _selection_parallel

import pandas as pd
import time
import os.path
import multiprocessing as mp
import itertools
import torch.nn as nn

#%% Load Data
data_dir = r"/home/fr/fr_fr/fr_mw263"
X, Y = preprocessing.get_splits(sites = ["bily_kriz"], 
                                years = [2005, 2006],
                                datadir = os.path.join(data_dir, "scripts/data"), 
                                dataset = "profound",
                                simulations = None)

X_test, Y_test = preprocessing.get_splits(sites = ['bily_kriz'],
                                years = [2008],
                                datadir = os.path.join(data_dir, "scripts/data"), 
                                dataset = "profound",
                                simulations = None)

#%% Grid search of hparams
hiddensize = [8, 16, 32, 64, 128, 256]
batchsize = [8, 16, 32, 64, 128, 256]
learningrate = [1e-4, 1e-3, 5e-3, 1e-2]
history = [0,1,2]
n_layers = [1,2,3]
Beispiel #12
0
import setup.models as models
from ast import literal_eval
import torch.nn as nn
import torch
import setup.preprocessing as preprocessing
import setup.dev_mlp as dev_mlp
import setup.utils as utils
import collect_results
import finetuning
from sklearn import metrics

data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"

X_train, Y_train = preprocessing.get_splits(
    sites=['hyytiala'],
    years=[2001, 2002, 2003, 2004, 2005, 2006, 2007],
    datadir=os.path.join(data_dir, "data"),
    dataset="profound",
    simulations=None)

X_test, Y_test = preprocessing.get_splits(sites=['hyytiala'],
                                          years=[2008],
                                          datadir=os.path.join(
                                              data_dir, "data"),
                                          dataset="profound",
                                          simulations=None)

#%% Number of Network Parameters
#mods = [5,7,10,12,13,14]
mods = [0, 4, 5]
dummies = False
for mod in mods:
Beispiel #13
0
@author: marie
"""

import setup.preprocessing as preprocessing
import setup.models as models
import finetuning
import os.path
import numpy as np
import pandas as pd
import torch

data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
#%%
X_test, Y_test = preprocessing.get_splits(sites = ['hyytiala'],
                                years = [2001, 2002, 2003, 2004, 2005, 2006, 2008],
                                datadir = os.path.join(data_dir, "data"), 
                                dataset = "profound",
                                simulations = None,
                                to_numpy=False)

df_new = pd.DataFrame({"PAR": X_test["PAR"].mean(),
                       "TAir": np.arange(X_test["TAir"].min(), X_test["TAir"].max(), step=0.01),
                       "VPD": X_test["VPD"].mean(),
                       "Precip": X_test["Precip"].mean(),
                       "fAPAR": X_test["fAPAR"].mean(),
                       "DOY_sin": X_test["DOY_sin"].mean(),
                       "DOY_cos": X_test["DOY_cos"].mean()})
df_new.to_csv(os.path.join(data_dir, "data\post_analysis\df1.csv"), sep=",")

df = df_new.to_numpy()

#%%
Beispiel #14
0
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import setup.models as models
import numpy as np
import setup.utils as utils
import setup.preprocessing as preprocessing
import matplotlib.pyplot as plt
import random

#%%%
datadir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
X, Y = preprocessing.get_splits(
    sites=['le_bray'],
    years=[2001],
    datadir=os.path.join(datadir, "data"),
    dataset="profound",
    simulations=None,
    colnames=["PAR", "TAir", "VPD", "Precip", "fAPAR", "DOY_sin", "DOY_cos"],
    to_numpy=True)

#%% Train

X = utils.minmax_scaler(X)
X = torch.tensor(X).type(dtype=torch.float)
Y = torch.tensor(Y).type(dtype=torch.float)

#model = models.MLP([X.shape[1],12,1], nn.ReLU)
#model = models.LSTM(X.shape[1], 12, 1, 10, F.relu)

x, target = utils.create_batches(X, Y, 128, 0)
#x_test, target_test = utils.create_batches(X, Y, 128, 0)
def plot5(model, w = None, years=[2001,2002,2003, 2004, 2005, 2006, 2007]):
    
    def moving_average(x, w):
        return np.convolve(x, np.ones(w), 'valid') / w
    
    data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
    X, Y = preprocessing.get_splits(sites = ['hyytiala'],   
                                years = [2008],
                                datadir = os.path.join(data_dir, "data"), 
                                dataset = "profound",
                                simulations = None)

    Y_preles = pd.read_csv(os.path.join(data_dir ,r"data\profound\outputhyytiala2008calib"), sep=";")
    Y_nn = np.transpose(np.load(r"OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt\python\outputs\models\mlp0\noPool\sigmoid\y_preds.npy", allow_pickle=True).squeeze(2))
    predictions_test, errors = finetuning.featureExtractorC("mlp", 10, None, 50,
                      years = years)
    Y_nn_f = np.transpose(np.array(predictions_test).squeeze(2))
    
    if not w is None:
        Y = moving_average(Y.squeeze(1), w)
        Y_preles = moving_average(Y_preles.squeeze(1), w)
        Y_nn = moving_average(np.mean(Y_nn, axis=1), w)
        Y_nn_f = moving_average(np.mean(Y_nn_f, axis=1), w)
    else:
        Y = Y.squeeze(1)
        Y_preles = Y_preles.squeeze(1)
        Y_nn = np.mean(Y_nn, axis=1)
        Y_nn_f = np.mean(Y_nn_f, axis=1)

    plt.figure(num=None, figsize=(7, 7), facecolor='w', edgecolor='k')
    if model == "preles":
        plt.scatter(Y_preles, Y, color="darkblue")
        # Fit with polyfit
        b, m = polyfit(Y_preles, Y,  1)
        r2_p = rsquared(Y_preles, Y,  1)["determination"]
        plt.plot(Y_preles, b + m * Y_preles, '-', color="darkred", label = "y = a + b $\hat{y}$ ")
        maep = metrics.mean_absolute_error(Y, Y_preles)
        plt.text(0,10, f"MAE = {np.round(maep, 4)}")
        plt.text(0,9, f"R$^2$ = {np.round(r2_p, 4)}")
    elif model == "mlp0":
        plt.scatter(Y_nn, Y, color="darkblue")
        # Fit with polyfit
        b, m = polyfit(Y_nn, Y, 1)
        r2_nn = rsquared(Y_nn, Y,  1)["determination"]
        plt.plot(Y_nn, b + m *Y_nn, '-', color="darkred", label = "y = a + b $\hat{y}$ ")
        maen = metrics.mean_absolute_error(Y, Y_nn)
        plt.text(0,10, f"MAE = {np.round(maen, 4)}")
        plt.text(0,9, f"R$^2$ = {np.round(r2_nn, 4)}")
    elif model == "mlp10":
        plt.scatter(Y_nn_f, Y, color="darkblue")
        b, m = polyfit(Y_nn_f, Y, 1)
        r2_nnf = rsquared(Y_nn_f, Y,  1)["determination"]
        plt.plot(Y_nn_f, b + m * Y_nn_f, '-', color="darkred", label = "y = a + b $\hat{y}$ ")
        maenf = metrics.mean_absolute_error(Y, Y_nn_f)
        plt.text(0,10, f"MAE = {np.round(maenf, 4)}")
        plt.text(0,9, f"R$^2$ = {np.round(r2_nnf, 4)}")
    
    plt.plot(np.arange(11), 0 + 1 *np.arange(11), '--', color="gray", label = "y = $\hat{y}$")
    plt.xlim((-1,11))
    plt.ylim((-1,11))
    plt.ylabel("True GPP Test [g C m$^{-2}$ day$^{-1}$]")
    plt.xlabel("Estimated GPP Test [g C m$^{-2}$ day$^{-1}$]")
    
    plt.legend(loc="lower right")
import os.path
import setup.dev_cnn as dev_cnn
import setup.dev_mlp as dev_mlp
import setup.dev_lstm as dev_lstm
import torch
import torch.nn as nn
import setup.preprocessing as preprocessing
import visualizations
import numpy as np
#%%
data_dir = "OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"
#%%
X, Y = preprocessing.get_splits(sites=['le_bray'],
                                years=[2001, 2003],
                                datadir=os.path.join(data_dir, "data"),
                                dataset="profound",
                                simulations=None)

#%%
X_sims, Y_sims = preprocessing.get_simulations(data_dir=os.path.join(
    data_dir, r"data\simulations\uniform_params"),
                                               drop_parameters=True)

#%%
hparams = {
    "batchsize": 256,
    "epochs": 1000,
    "history": 7,
    "hiddensize": 128,
    "learningrate": 0.01
Beispiel #17
0
def train_network(model,
                  typ,
                  site,
                  epochs,
                  q,
                  adaptive_pooling,
                  dropout_prob,
                  dropout,
                  sparse=None,
                  traindata_perc=None,
                  save=True,
                  data_dir=r"/home/fr/fr_fr/fr_mw263"):
    """
    Takes the best found model parameters and trains a MLP with it.
    
    Args:
        X, Y (numpy array): Feature and Target data. \n
        model_params (dict): dictionary containing all required model parameters. \n
        epochs (int): epochs to train the model. \n
        splits (int): How many splits will be used in the CV. \n
        eval_set (numpy array): if provided, used for model evaluation. Default to None.
        
    Returns:
        running_losses: epoch-wise training and validation errors (rmse and mae) per split.\n
        y_tests: Target test set on which the model was evaluated on per split.\n
        y_preds: Network predictions per split.\n
        performance (pd.DataFrame): Data frame of model parameters and final training and validation errors.\n
    """
    X, Y = preprocessing.get_splits(
        sites=[site],
        years=[2001, 2002, 2003, 2004, 2005, 2006, 2007],
        datadir=os.path.join(data_dir, "scripts/data"),
        dataset="profound",
        simulations=None)

    if not sparse is None:
        ind = np.random.choice(X.shape[0],
                               int(np.floor(X.shape[0] / 100 * sparse)),
                               replace=False)
        X, Y = X[ind], Y[ind]

    X_test, Y_test = preprocessing.get_splits(sites=[site],
                                              years=[2008],
                                              datadir=os.path.join(
                                                  data_dir, "scripts/data"),
                                              dataset="profound",
                                              simulations=None)

    eval_set = {"X_test": X_test, "Y_test": Y_test}

    hparams, model_design = set_model_parameters(model, typ, epochs,
                                                 adaptive_pooling, X, Y)

    start = time.time()

    data_dir = os.path.join(data_dir, f"output/models/{model}{typ}")

    data_dir = os.path.join(data_dir, f"relu")

    if not sparse is None:
        data_dir = os.path.join(data_dir, f"sparse//{sparse}")

    dev = __import__(f"setup.dev_{model}", fromlist=["selected"])

    running_losses, performance, y_tests, y_preds = dev.train_model_CV(
        hparams, model_design, X, Y, eval_set, dropout_prob, dropout, data_dir,
        save)
    end = time.time()

    # performance returns: rmse_train, rmse_test, mae_train, mae_test in this order.
    performance = np.mean(np.array(performance), axis=0)
    rets = [(end - start), hparams["hiddensize"], hparams["batchsize"],
            hparams["learningrate"], hparams["history"],
            model_design["activation"], performance[0], performance[1],
            performance[2], performance[3]]
    results = pd.DataFrame([rets],
                           columns=[
                               "execution_time", "hiddensize", "batchsize",
                               "learningrate", "history", "activation",
                               "rmse_train", "rmse_val", "mae_train", "mae_val"
                           ])
    results.to_csv(os.path.join(data_dir, r"selected_results.csv"),
                   index=False)

    # Save: Running losses, ytests and ypreds.
    np.save(os.path.join(data_dir, "running_losses.npy"), running_losses)
    np.save(os.path.join(data_dir, "y_tests.npy"), y_tests)
    np.save(os.path.join(data_dir, "y_preds.npy"), y_preds)
    if not sparse is None:
        np.save(os.path.join(data_dir, "ind.npy"), ind)
Beispiel #18
0
def settings(typ,
             epochs,
             data_dir,
             dummies,
             sparse=None,
             years=[2001, 2002, 2003, 2004, 2005, 2006, 2007],
             random_days=None):

    X, Y = preprocessing.get_splits(sites=['hyytiala'],
                                    years=years,
                                    datadir=os.path.join(data_dir, "data"),
                                    dataset="profound",
                                    simulations=None)
    X_test, Y_test = preprocessing.get_splits(sites=['hyytiala'],
                                              years=[2008],
                                              datadir=os.path.join(
                                                  data_dir, "data"),
                                              dataset="profound",
                                              simulations=None)

    if dummies:

        Xf = np.zeros((X.shape[0], 12))
        Xf_test = np.zeros((X_test.shape[0], 12))
        Xf[:, :7] = X
        X = Xf
        Xf_test[:, :7] = X_test
        X_test = Xf_test

    if not random_days is None:
        ind = np.random.choice(X.shape[0], random_days)
        X, Y = X[ind], Y[ind]
    if not sparse is None:
        ind = np.load(
            os.path.join(
                data_dir,
                f"python\outputs\models\mlp{typ}\\relu\sparse\\{sparse}\ind.npy"
            ))
        X, Y = X[ind], Y[ind]

    if ((typ == 6) | (typ == 8)):
        #gridsearch_results = pd.read_csv(os.path.join(data_dir, f"python\outputs\grid_search\simulations\grid_search_results_{model}2_adaptPool.csv"))
        gridsearch_results = pd.read_csv(
            os.path.join(
                data_dir,
                f"python\outputs\grid_search\simulations\\7features\grid_search_results_mlp2_np.csv"
            ))
    elif ((typ == 0) | (typ == 9) | (typ == 10) | (typ == 13)):
        gridsearch_results = pd.read_csv(
            os.path.join(
                data_dir,
                f"python\outputs\grid_search\observations\mlp\grid_search_results_mlp2.csv"
            ))
    elif ((typ == 4) | (typ == 11) | (typ == 12) | (typ == 14)):
        gridsearch_results = pd.read_csv(
            os.path.join(
                data_dir,
                f"python\outputs\grid_search\observations\mlp\grid_search_results_mlp2.csv"
            ))
        gridsearch_results = gridsearch_results[(
            gridsearch_results.nlayers == 3)].reset_index()
    elif ((typ == 5) | (typ == 7)):
        gridsearch_results = pd.read_csv(
            os.path.join(
                data_dir,
                f"python\outputs\grid_search\observations\mlp\AdaptPool\\7features\grid_search_results_mlp2.csv"
            ))

    setup = gridsearch_results.iloc[
        gridsearch_results['mae_val'].idxmin()].to_dict()

    dimensions = [X.shape[1]]
    for dim in literal_eval(setup["hiddensize"]):
        dimensions.append(dim)
    dimensions.append(Y.shape[1])

    if ((typ == 6) | (typ == 7) | (typ == 8) | (typ == 5)):
        featuresize = setup["featuresize"]
    else:
        featuresize = None

    hparams = {
        "batchsize": int(setup["batchsize"]),
        "epochs": epochs,
        "history": int(setup["history"]),
        "hiddensize": literal_eval(setup["hiddensize"]),
        "learningrate": setup["learningrate"]
    }

    model_design = {
        "dimensions": dimensions,
        "activation": nn.ReLU,
        "featuresize": featuresize
    }

    return hparams, model_design, X, Y, X_test, Y_test
Beispiel #19
0
import setup.preprocessing as preprocessing

import pandas as pd
import time

from setup.dev_rf import rf_selection_parallel
import multiprocessing as mp
import itertools
import setup.utils as utils

#%% Load Data
data_dir = r"/home/fr/fr_fr/fr_mw263/scripts"
X, Y = preprocessing.get_splits(
    sites=["le_bray"],
    years=[2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008],
    datadir=os.path.join(data_dir, "data"),
    dataset="profound",
    simulations=None)

#%%
cv_splits = [6]
shuffled = [False]
n_trees = [200, 300, 400, 500]
depth = [4, 5, 6, 7]
eval_set = None

p_list = utils.expandgrid(cv_splits, shuffled, n_trees, depth)

searchsize = len(p_list[0])

if __name__ == '__main__':
Beispiel #20
0
def settings(
        typ,
        years=[2001, 2002, 2003, 2004, 2005, 2006, 2007],
        data_dir="OneDrive\Dokumente\Sc_Master\Masterthesis\Project\DomAdapt"):

    X, Y = preprocessing.get_splits(sites=['hyytiala'],
                                    years=years,
                                    datadir=os.path.join(data_dir, "data"),
                                    dataset="profound",
                                    simulations=None)
    X_test, Y_test = preprocessing.get_splits(sites=['hyytiala'],
                                              years=[2008],
                                              datadir=os.path.join(
                                                  data_dir, "data"),
                                              dataset="profound",
                                              simulations=None)

    if ((typ == 5) | (typ == 13) | (typ == 14)):

        Xf = np.zeros((X.shape[0], 12))
        Xf_test = np.zeros((X_test.shape[0], 12))
        Xf[:, :7] = X
        X = Xf
        Xf_test[:, :7] = X_test
        X_test = Xf_test

    if ((typ == 6) | (typ == 8)):
        #gridsearch_results = pd.read_csv(os.path.join(data_dir, f"python\outputs\grid_search\simulations\grid_search_results_{model}2_adaptPool.csv"))
        gridsearch_results = pd.read_csv(
            os.path.join(
                data_dir,
                f"python\outputs\grid_search\simulations\\7features\grid_search_results_mlp2_np.csv"
            ))
    elif ((typ == 0) | (typ == 9) | (typ == 10) | (typ == 13)):
        gridsearch_results = pd.read_csv(
            os.path.join(
                data_dir,
                f"python\outputs\grid_search\observations\mlp\grid_search_results_mlp2.csv"
            ))
    elif ((typ == 4) | (typ == 11) | (typ == 12) | (typ == 14)):
        gridsearch_results = pd.read_csv(
            os.path.join(
                data_dir,
                f"python\outputs\grid_search\observations\mlp\grid_search_results_mlp2.csv"
            ))
        gridsearch_results = gridsearch_results[(
            gridsearch_results.nlayers == 3)].reset_index()
    elif ((typ == 5) | (typ == 7)):
        gridsearch_results = pd.read_csv(
            os.path.join(
                data_dir,
                f"python\outputs\grid_search\observations\mlp\AdaptPool\\7features\grid_search_results_mlp2.csv"
            ))

    setup = gridsearch_results.iloc[
        gridsearch_results['mae_val'].idxmin()].to_dict()

    dimensions = [X.shape[1]]
    for dim in literal_eval(setup["hiddensize"]):
        dimensions.append(dim)
    dimensions.append(Y.shape[1])

    if ((typ == 6) | (typ == 7) | (typ == 8) | (typ == 5)):
        featuresize = setup["featuresize"]
    else:
        featuresize = None

    hparams = {
        "batchsize": int(setup["batchsize"]),
        "epochs": None,
        "history": int(setup["history"]),
        "hiddensize": literal_eval(setup["hiddensize"]),
        "learningrate": setup["learningrate"]
    }

    model_design = {
        "dimensions": dimensions,
        "activation": nn.ReLU,
        "featuresize": featuresize
    }

    return hparams, model_design, X, Y, X_test, Y_test