def twoLevelCV_single_PCA(xIn, yIn, model, K1, K2):
    Input: (numpy array) xIn matrix, (numpy array) yIn matrix,(module) model,
            (int) K1:folds in outer loop, (int) K2:folds in inner loop, 
    Output: (numpy array) estimatedGenError

    CV_outer = model_selection.KFold(n_splits=K1, shuffle=True)
    CV_inner = model_selection.KFold(n_splits=K2, shuffle=True)

    # Initialize variables
    error_test = np.empty(K1)
    error_train = np.empty(K2)
    error_val = np.empty(K2)

    # Outer cross-validation loop. Performance Evaluation
    k1 = 0
    for par_index, test_index in CV_outer.split(xIn):

        print("\nOuter Iteration {0}/{1} -----------".format(k1 + 1, K1))
        # extract par and test set for current CV fold
        X_par = xIn[par_index, :]
        y_par = yIn[par_index]
        X_test = xIn[test_index, :]
        y_test = yIn[test_index]
        trainSetsX = []
        trainSetsY = []

        # Inner cross-validation loop. Model Selection
        k2 = 0

        for train_index, val_index in CV_inner.split(X_par):

            print("\nInner Iteration {0}/{1}".format(k2 + 1, K2))

            # Extract train and test set for current CV fold
            X_train = X_par[train_index, :]
            y_train = y_par[train_index]
            X_val = X_par[val_index, :]
            y_val = y_par[val_index]

            trainSetsX.append(X_train)  # To trace back optimal models

            # Extract projected data set and PCA space vector, fit the model
            V_D_temp = pca_compute(X_train, X_train, threshold, pcUsed)[6]
            X_train_PCA = pca_compute(X_train, X_train, threshold, pcUsed)[2]

            m =, y_train)

            # Project validation data into the training PCA space, compute MSE
            X_val_temp = X_val @ V_D_temp
            X_val_temp = X_val_temp[:, :pcUsed]

            # Compute MSEs
            error_train[k2] = np.square(
                y_train - m.predict(X_train_PCA)).sum() / y_train.shape[0]
            error_val[k2] = np.square(
                y_val - m.predict(X_val_temp)).sum() / y_val.shape[0]

            print("Validation error {0}:".format(np.round(error_val[k2], 4)))

            k2 += 1

        # Trace back the model according to its CV fold index
        print("Inner CV fold of the best model for the last loop: {0}".format(
            error_val.argmin() + 1))
        # Extract projected data set and PCA space vector, fit the model
        X_temp = trainSetsX[error_val.argmin()]
        y_temp = trainSetsY[error_val.argmin()]

        V_D_temp = pca_compute(X_temp, y_temp, threshold, pcUsed)[6]
        X_temp = pca_compute(X_temp, X_temp, threshold, pcUsed)[2]

        m =, y_temp)

        X_test_PCA = X_test @ V_D_temp
        X_test_PCA = X_test_PCA[:, :pcUsed]

        # Compute MSE
        error_test[k1] = np.square(
            y_test - m.predict(X_test_PCA)).sum() / y_test.shape[0]

        k1 += 1

    estimatedGenError = np.round(np.mean(error_test, axis=0), 4)

    print("Estimated Generalization Error: {0}".format(estimatedGenError))

    return estimatedGenError
def twoLevelCV_compare_PCA(xIn, yIn, models, K1, K2):
    Input: (numpy array) xIn matrix, (numpy array) yIn matrix, (list) models, 
            (int) K1:folds in outer loop, (int) K2:folds in inner loop, 
            (list) lamda_range: range of values of lambda
    Output: (numpy array) estimatedGenError
    CV_outer = model_selection.KFold(n_splits=K1, shuffle=True)
    CV_inner = model_selection.KFold(n_splits=K2, shuffle=True)
    # Initialize variables
    error_test = np.empty((K1, len(models)))
    error_train = np.empty((K2, len(models)))
    error_val = np.empty((K2, len(models)))
    gen_error_models = np.empty((len(models), 1))
    best_models_idx = np.empty((1, len(models)))
    estimatedGenError = np.empty((1, len(models)))
    # Outer cross-validation loop. Performance Evaluation
    k1 = 0 
    for par_index, test_index in CV_outer.split(xIn):
        print("\nOuter Iteration {0}/{1} -----------".format(k1+1, K1))
        # extract par and test set for current CV fold
        X_par = xIn[par_index, :]
        y_par = yIn[par_index]
        X_test = xIn[test_index, :]
        y_test = yIn[test_index]
        # Inner cross-validation loop. Model Selection
        k2 = 0
        trainSetsX = []
        trainSetsY = []     
        for train_index, val_index in CV_inner.split(X_par):
            print("\nInner Iteration {0}/{1}".format(k2+1, K2))
            # Extract DEFAULT train and test set for current CV fold
            X_train = X_par[train_index, :]
            y_train = y_par[train_index]       
            X_val = X_par[val_index, :]
            y_val = y_par[val_index]
            trainSetsX.append(X_train) # To trace back optimal models
            for s, model in enumerate(models):
                # Determine specific process flow for the PCA model
                if s == 1:
                    # Extract projected data set and PCA space vector, fit the model
                    V_D_temp = pca_compute(X_train, X_train, threshold, pcUsed)[6]
                    X_train_PCA = pca_compute(X_train, X_train, threshold, pcUsed)[2]
                    m =, y_train)
                    # Project validation data into the training PCA space, compute MSE
                    X_val_temp = X_val @ V_D_temp
                    X_val_temp = X_val_temp[:, :pcUsed]
                    # Compute MSEs
                    error_train[k2, s] = np.square( y_train - m.predict(X_train_PCA) ).sum() / y_train.shape[0]
                    error_val[k2, s] = np.square( y_val - m.predict(X_val_temp) ).sum() / y_val.shape[0]
                    print("Validation error - Model {0}: {1}".format(s+1, np.round(error_val[k2, s], 4) ))
                    m =, y_train)
                    # Compute MSE
                    error_train[k2, s] = np.square( y_train - m.predict(X_train) ).sum() / y_train.shape[0]
                    error_val[k2, s] = np.square( y_val - m.predict(X_val) ).sum() / y_val.shape[0]
                    print("Validation error - Model {0}: {1}".format(s+1, np.round(error_val[k2, s], 4) ))
            k2 += 1
        # STILL ToDo
        # ---------------------------------
        # Train the model on D_par & compute the error
        # ---------------------------------

        for s, model in enumerate(models): 
            # Find the CV index of optimal model
            best_models_idx[0, s] = error_val[:, s].argmin()
            print("Inner CV fold of the best model {0} (last loop): {1}".format(s, best_models_idx[0, s]+1))
            # Determine specific process flow for the PCA model
            if s == 1:
                # Extract projected data set and PCA space vector, fit the model
                X_temp = trainSetsX[int(best_models_idx[0, s])]
                y_temp = trainSetsY[int(best_models_idx[0, s])]
                V_D_temp = pca_compute(X_temp, y_temp, threshold, pcUsed)[6]
                X_temp = pca_compute(X_temp, X_temp, threshold, pcUsed)[2]
                m =, y_temp)
                X_test_PCA = X_test @ V_D_temp
                X_test_PCA = X_test_PCA[:, :pcUsed]
                error_test[k1, s] = np.square( y_test - m.predict(X_test_PCA) ).sum()/y_test.shape[0]
                # Trace back the model according to its CV fold index
                m =[int(best_models_idx[0, s])], trainSetsY[int(best_models_idx[0, s])])
                # Compute MSE
                error_test[k1, s] = np.square( y_test - m.predict(X_test) ).sum()/y_test.shape[0]