Esempio n. 1
0
def sgdm(m, degrees, n_epochs, b, eta, noise=0, gamma = 0): #stocastic gradient decent with momentum
    np.random.seed(1337)
    x = np.random.rand(m,degrees) #+1?
    y = np.random.rand(m,degrees) #+1?

    X_mesh, Y_mesh = np.meshgrid(x, y)

    z = f.FrankeFunction(X_mesh, Y_mesh) + noise*np.random.randn(X_mesh.shape[0], Y_mesh.shape[0])

    z= np.ravel(z)
    X = f.X_make(X_mesh,Y_mesh, degrees)

    #SPLIT AND SCALE
    X_tr, X_te, z_tr, z_te = train_test_split(X,z, test_size=0.3)
    scaler = StandardScaler()
    # X_tr = scaler.fit(X_tr).transform(X_tr)
    # z_tr = scaler.transform(z_tr.reshape(-1,1))
    # z_te = scaler.fit(z_te).transform(z_te)

                                                # removes the mean and scales each feature/variable to unit variance
    scaler.fit(X_tr)                         # compute the mean and std to be used for later scaling
    X_tr= scaler.transform(X_tr)  # perform standardization by centering and scaling
    X_te = scaler.transform(X_te)    # fit to data, then transform it
    z_tr = z_tr.reshape(-1,1)
    z_te = z_te.reshape(-1,1)
    scaler.fit(z_tr)
    z_tr = scaler.transform(z_tr)
    z_te = scaler.transform(z_te)

    l = int((degrees+1)*(degrees+2)/2) #length of design matrix row
    beta = np.random.randn(l,1) #length of a design matrix row
    #b = int(m/batch_num) #batch size
    batch_num = int(m/b)
    if m%batch_num:
        print('warning; batch number and dataset not compatible')

    v = 0
    mse_eval = np.zeros(n_epochs)
    index_array = np.arange(m) #indexes of rows
    batch_array= np.arange(batch_num)
    batch_array *=b
    for epoch in range(n_epochs):
        np.random.shuffle(index_array)
        for i in range(batch_num): #m is number of batches
            xi = X_tr[index_array[batch_array[i]]: (index_array[(batch_array[i]+1)])]
            zi = z_tr[index_array[batch_array[i]]: (index_array[(batch_array[i]+1)])]

            gradients = 2/b * xi.T @ (xi @ beta - zi.reshape(-1,1)) #derived from cost function
            #eta = 0.001#learning_rate(epoch*m+i)
            v = gamma*v + eta*gradients
            beta = beta - v
        z_eval = X_te.dot(beta)
        mse_eval[epoch] = f.MSE(z_te, z_eval)
    beta_ols = f.OLS(X_tr, z_tr)
    z_ols = X_te.dot(beta_ols)
    mse_beta = f.MSE(z_te, z_ols)
    return beta, mse_eval, mse_beta
Esempio n. 2
0
    def train(self, epochs, batch_size, x, y, activation, derivative,\
     xvalidation, yvalidation, verbose=False):

        tmp = int(len(y) / batch_size)
        Niter = min(200, tmp)
        indexes = np.arange(len(y))
        cost = np.empty([epochs])

        self.cost_val = list()
        self.cost_train = list()

        for i in range(epochs):
            for j in range(Niter):

                datapoints = np.random.choice(indexes,
                                              size=batch_size,
                                              replace=False)
                batch_x = x[datapoints, :]
                batch_y = y[datapoints]

                self.feed(batch_x, activation)
                self.back(batch_x, batch_y, derivative)

            pred_val = self.feed_out(xvalidation, activation)
            pred_train = self.feed_out(batch_x, activation)

            if self.mode == 'regression':
                self.cost_val.append(
                    fx.MSE(pred_val.ravel(), yvalidation.ravel()))
                self.cost_train.append(
                    fx.MSE(pred_train.ravel(), batch_y.ravel()))

            if self.mode == 'classification':
                self.cost_val.append(
                    lrf.cost_log_ols(pred_val.ravel(), yvalidation.T))
                self.cost_train.append(
                    lrf.cost_log_ols(pred_train.ravel(), batch_y.T))

            if i > self.early_stop_nochange:
                avg_indx_full = np.arange(i - self.early_stop_nochange, i)
                avg_indx_full.astype(int)
                avg_indx = np.arange(i - 5, i)
                avg_indx.astype(int)

                if -self.early_stop_tol < np.mean(
                        np.array(self.cost_val)[avg_indx]) - np.mean(
                            np.array(self.cost_val)[avg_indx_full]):
                    break

            if verbose:
                print('Epoch', i + 1, 'loss', self.cost_val[i])
Esempio n. 3
0
def bootstrap(x, y, max_deg, boots_num):
    np.random.seed(130)
    """
    applies the bootstrap algorithm

    args:
        x,y (np.array): initial datapoints
        max_deg (int):
        boots_num (int): number of bootstraps
    """

    x, y = np.meshgrid(x, y)
    z = np.ravel(
        f.FrankeFunction(x, y) +
        0.5 * np.random.randn(np.shape(x)[0],
                              np.shape(y)[1]))

    MSE_degree_values = np.zeros(max_deg)
    MSE_test_degree_values = np.zeros(max_deg)
    MSE_train_values = np.zeros(boots_num)
    MSE_test_values = np.zeros(boots_num)
    for k, deg in enumerate(range(1, max_deg)):
        #Degrees loop that contains K-fold
        X_design = f.X_make(x, y, deg)
        scaler = StandardScaler()

        X_tr, X_te, z_tr, z_te = train_test_split(X_design, z, test_size=0.2)
        scaler.fit(X_tr)

        X_train = scaler.transform(X_tr)
        X_test = scaler.transform(X_te)
        #doing this AFTER train test split. otherwise the test data
        #gets affected by the train data
        z_bootstrap = np.empty(int(len(z_tr)))
        index_array = np.arange(0, len(z_tr), 1)

        for i in range(boots_num):
            indx = resample(index_array, random_state=0)
            z_bootstrap = z_tr[indx]

            z_test = X_test.dot(f.OLS(X_train[indx, :], z_bootstrap))
            z_train = X_train.dot(f.OLS(X_train[indx, :], z_bootstrap))
            MSE_train_values[i] = f.MSE(z_tr, z_train)
            MSE_test_values[i] = f.MSE(z_te, z_test)

        MSE_degree_values[k] = np.sum(MSE_train_values) / boots_num
        MSE_test_degree_values[k] = np.sum(MSE_test_values) / boots_num
    return MSE_degree_values, MSE_test_degree_values
Esempio n. 4
0
    def SGD(self, n_epochs, batch_size, gamma=0.9, lmbda=0):
        """Stochastic gradient descent.

        Keyword arguments:
        n_epochs -- number of epochs
        batch_size -- size of minibatch
        gamma -- momentum parameter (default = 0.9)
        lmbda -- regularization parameter (default = 0)

        Exception:
        Exception raised when batch size does not result in an equal division of
        training data.
        """
        n = self.X_train.shape[0]
        if n % batch_size:
            raise Exception("Batch number and dataset not compatible")
        n_batches = int(n/batch_size)

        beta = np.random.randn(self.X_train.shape[1], 1)    # initialize beta

        v = 0
        self.mse_epochs = np.zeros(n_epochs)
        index_array = np.arange(n)
        for epoch in range(n_epochs):
            np.random.shuffle(index_array)
            X_minibatches = np.split(self.X_train[index_array], n_batches)
            z_minibatches = np.split(self.z_train[index_array], n_batches)

            i = 0
            for X_batch, z_batch in zip(X_minibatches, z_minibatches):
                # Calculate mean gradient of minibatch
                gradient = self.grad_cost_function(X_batch, z_batch, beta,
                                                   batch_size, lmbda)

                # Update beta
                eta = self.learning_rate(epoch*n + i)
                v = gamma*v + eta*gradient
                beta = beta - v
                i += 1

            z_tilde = self.X_test @ beta
            self.mse_epochs[epoch] = f.MSE(self.z_test, z_tilde)
Esempio n. 5
0
def test_NN_MSE():

    test_loss = fx.MSE(pred.ravel(), Y_test.T)
    test_loss_sk = mean_squared_error(Y_test.ravel(), pred_sk)

    assert (abs(test_loss_sk - test_loss) < 1e-1)
Esempio n. 6
0
def cross_validation(n,
                     maxdegree,
                     noise,
                     n_folds,
                     method=f.OLS,
                     seed=130,
                     lmbda=0,
                     datatype='Franke',
                     filename='SRTM_data_Minneapolis'):
    """
    cross_validation

    Input:
        n           -   number of datapoints before meshgrid
        maxdegree   -   max degree to iterate over
        noise       -   amount of noise
        n_folds     -   number of folds in cross validation
        method      -   regression method (OLS, Ridge, Lasso)
        seed        -   seed to random number generator
        lmbda       -   lambda value to use in Ridge and Lasso
        datatype    -   datatype to fit (Franke, Terrain)
        filename    -   file with terrain data

    Output:
        polydegree      -   array with model complexity
        MSE_mean        -   array with mean MSE from each cross validation
        MSE_best        -   MSE for the best fit
        R2Score_skl     -   array with R2Score for Scikit Learn cross validation
        R2Score_mean    -   array with mean R2Score from each cross validation
    """
    if n % n_folds != 0:
        raise Exception("Can't divide data set in n_folds equally sized folds")

    polydegree = np.zeros(maxdegree)
    MSE_mean = np.zeros(maxdegree)
    MSE_mean_sklearn = np.zeros(maxdegree)
    R2Score_mean = np.zeros(maxdegree)
    R2Score_skl = np.zeros(maxdegree)

    # Make data
    np.random.seed(int(seed))

    if datatype == 'Franke':
        x_train, x_test, y_train, y_test, z_train, z_test = f.FrankeData(
            n, noise, test_size=0.3)

    elif datatype == 'Terrain':
        x_train, x_test, y_train, y_test, z_train, z_test = f.TerrainData(
            n, filename)

    for degree in range(0, maxdegree):
        polydegree[degree] = degree

        # Create design matrix
        X_train = f.design_matrix(x_train, y_train, degree)

        # Shuffle data to get random folds
        index = np.arange(0, np.shape(X_train)[0], 1)
        np.random.seed(int(seed))
        np.random.shuffle(index)
        X_train_random = X_train[index, :]
        z_train_random = z_train[index]

        # Split data in n_folds folds
        X_folds = np.array(np.array_split(X_train_random, n_folds))
        z_folds = np.array(np.array_split(z_train_random, n_folds))

        if method == f.OLS:
            clf = skl.LinearRegression()
            scores = cross_val_score(clf,
                                     X_train,
                                     z_train,
                                     cv=n_folds,
                                     scoring='neg_mean_squared_error')
            MSE_mean_sklearn[degree] = np.abs(np.mean(scores))
            best_degree_sklearn = np.argmin(MSE_mean_sklearn)

            # Make fit to holy test data
            X_train_best = f.design_matrix(x_train, y_train,
                                           best_degree_sklearn)
            scaler = StandardScaler()
            scaler.fit(X_train_best)
            X_train_best_scaled = scaler.transform(X_train_best)
            X_test_best = f.design_matrix(x_test, y_test, best_degree_sklearn)
            X_test_best_scaled = scaler.transform(X_test_best)

            X_train_best_scaled[:, 0] = 1
            X_test_best_scaled[:, 0] = 1

            scaler.fit(z_train.reshape(-1, 1))
            z_train_scaled = scaler.transform(z_train.reshape(-1, 1))
            z_test_scaled = scaler.transform(z_test.reshape(-1, 1))

            beta_best_sklearn = f.OLS(X_train_best_scaled, z_train_scaled)

        elif method == f.Ridge:
            clf = skl.Ridge()
            scores = cross_val_score(clf,
                                     X_train,
                                     z_train,
                                     cv=n_folds,
                                     scoring='neg_mean_squared_error')
            MSE_mean_sklearn[degree] = np.abs(np.mean(scores))
            best_degree_sklearn = np.argmin(MSE_mean_sklearn)

            # Make fit to holy test data
            X_train_best = f.design_matrix(x_train, y_train,
                                           best_degree_sklearn)
            scaler = StandardScaler()
            scaler.fit(X_train_best)
            X_train_best_scaled = scaler.transform(X_train_best)
            X_test_best = f.design_matrix(x_test, y_test, best_degree_sklearn)
            X_test_best_scaled = scaler.transform(X_test_best)

            X_train_best_scaled[:, 0] = 1
            X_test_best_scaled[:, 0] = 1

            scaler.fit(z_train.reshape(-1, 1))
            z_train_scaled = scaler.transform(z_train.reshape(-1, 1))
            z_test_scaled = scaler.transform(z_test.reshape(-1, 1))

            beta_best_sklearn = f.OLS(X_train_best_scaled, z_train_scaled)

        elif method == 'Lasso':
            clf_lasso = skl.Lasso(alpha=lmbda, fit_intercept=False)
            scores = cross_val_score(clf_lasso,
                                     X_train,
                                     z_train,
                                     cv=n_folds,
                                     scoring='neg_mean_squared_error')
            MSE_mean_sklearn[degree] = np.abs(np.mean(scores))
            best_degree_sklearn = np.argmin(MSE_mean_sklearn)

            # Make fit to holy test data
            X_train_best = f.design_matrix(x_train, y_train,
                                           best_degree_sklearn)
            scaler = StandardScaler()
            scaler.fit(X_train_best)
            X_train_best_scaled = scaler.transform(X_train_best)
            X_test_best = f.design_matrix(x_test, y_test, best_degree_sklearn)
            X_test_best_scaled = scaler.transform(X_test_best)

            X_train_best_scaled[:, 0] = 1
            X_test_best_scaled[:, 0] = 1

            scaler.fit(z_train.reshape(-1, 1))
            z_train_scaled = scaler.transform(z_train.reshape(-1, 1))
            z_test_scaled = scaler.transform(z_test.reshape(-1, 1))

            beta_best_sklearn = f.OLS(X_train_best_scaled, z_train_scaled)

        # cross validation
        for k in range(n_folds):
            # Validation data
            X_val = X_folds[k]
            z_val = np.reshape(z_folds[k], (-1, 1))

            # Training data
            idx = np.ones(n_folds, dtype=bool)
            idx[k] = False
            X_train_fold = X_folds[idx]

            # Combine folds
            X_train_fold = np.reshape(
                X_train_fold, (X_train_fold.shape[0] * X_train_fold.shape[1],
                               X_train_fold.shape[2]))
            z_train_fold = np.reshape(np.ravel(z_folds[idx]), (-1, 1))

            # Scaling data
            scaler = StandardScaler(
            )  # removes the mean and scales each feature/variable to unit variance
            scaler.fit(
                X_train_fold
            )  # compute the mean and std to be used for later scaling
            X_train_fold_scaled = scaler.transform(
                X_train_fold
            )  # perform standardization by centering and scaling
            X_val_scaled = scaler.transform(X_val)
            # Set first column to one as StandardScaler sets it to zero
            X_train_fold_scaled[:, 0] = 1
            X_val_scaled[:, 0] = 1

            # scaler.fit(z_train_fold)
            # z_train_fold_scaled = scaler.transform(z_train_fold)
            # z_val_scaled = scaler.transform(z_val)
            z_train_fold_scaled = z_train_fold
            z_val_scaled = z_val

            # Choose method for calculating coefficients beta
            if method == f.OLS:
                beta_fold = method(X_train_fold_scaled, z_train_fold_scaled)
                z_tilde_fold = X_val_scaled @ beta_fold
                # z_tilde_fold_train = X_train_
            elif method == f.Ridge:
                beta_fold = method(X_train_fold_scaled, z_train_fold_scaled,
                                   lmbda, degree)
                z_tilde_fold = X_val_scaled @ beta_fold

            elif method == 'Lasso':
                clf_lasso = skl.Lasso(alpha=lmbda, fit_intercept=False).fit(
                    X_train_fold_scaled, z_train_fold_scaled)
                z_tilde_fold = clf_lasso.predict(X_val_scaled)

            MSE_mean[degree] += f.MSE(z_val_scaled, z_tilde_fold)
            R2Score_mean[degree] += f.R2Score(z_val_scaled, z_tilde_fold)

        MSE_mean[degree] /= n_folds
        R2Score_mean[degree] /= n_folds

        # # Cross-validation using Scikit-Learn
        # clf = skl.LinearRegression()
        # R2Score_skl[degree] = np.mean(cross_val_score(clf, X_train, z_train, scoring='r2', cv=n_folds))

    # Find the degree with smallest MSE
    best_degree = np.argmin(MSE_mean)
    print(best_degree)

    # Make fit to holy test data
    X_train_best = f.design_matrix(x_train, y_train, best_degree)
    scaler.fit(X_train_best)
    X_train_best_scaled = scaler.transform(X_train_best)
    X_test_best = f.design_matrix(x_test, y_test, best_degree)
    X_test_best_scaled = scaler.transform(X_test_best)

    X_train_best_scaled[:, 0] = 1
    X_test_best_scaled[:, 0] = 1

    scaler.fit(z_train.reshape(-1, 1))
    z_train_scaled = scaler.transform(z_train.reshape(-1, 1))
    z_test_scaled = scaler.transform(z_test.reshape(-1, 1))

    beta_best = f.OLS(X_train_best_scaled, z_train_scaled)
    z_tilde_best = X_test_best_scaled @ beta_best
    MSE_best = f.MSE(z_test_scaled, z_tilde_best)
    print(MSE_best)

    return polydegree, MSE_mean, MSE_best, R2Score_skl, R2Score_mean, beta_best, best_degree, MSE_mean_sklearn, best_degree_sklearn, beta_best_sklearn
Esempio n. 7
0
def no_resampling(n, maxdegree, noise, method=f.OLS, lmbda=0, seed=7053):
    # arrays for plotting of error
    polydegree = np.zeros(maxdegree)
    MSE_OLS = np.zeros(maxdegree)
    R2Score_OLS = np.zeros(maxdegree)
    MSE_test = np.zeros(maxdegree)
    MSE_train = np.zeros(maxdegree)
    MSE_train_scaled = np.zeros(maxdegree)
    MSE_test_scaled = np.zeros(maxdegree)
    R2Score_scaled = np.zeros(maxdegree)

    # Make data
    np.random.seed(seed)
    x = np.sort(np.random.uniform(0, 1, n))
    y = np.sort(np.random.uniform(0, 1, n))
    x, y = np.meshgrid(x, y)

    # Franke Function
    z = np.ravel(f.FrankeFunction(x, y) + noise * np.random.randn(n, n))

    for degree in range(0, maxdegree):
        polydegree[degree] = degree

        #Create design matrix
        X = f.design_matrix(x, y, degree)

        # Split in training and test data
        X_train, X_test, z_train, z_test = train_test_split(X,
                                                            z.reshape(-1, 1),
                                                            test_size=0.3)

        # OLS estimate train/test without scaled
        beta_OLS_train = f.OLS(X_train, z_train)
        ztilde_test = X_test @ beta_OLS_train
        ztilde_train = X_train @ beta_OLS_train

        MSE_train[degree] = f.MSE(z_train, ztilde_train)
        MSE_test[degree] = f.MSE(z_test, ztilde_test)

        # Scale data
        scaler = StandardScaler(
        )  # removes the mean and scales each feature/variable to unit variance
        scaler.fit(
            X_train)  # compute the mean and std to be used for later scaling
        X_train_scaled = scaler.transform(
            X_train)  # perform standardization by centering and scaling
        X_test_scaled = scaler.transform(
            X_test)  # fit to data, then transform it
        scaler.fit(z_train)
        # z_train_scaled = scaler.transform(z_train)
        # z_test_scaled = scaler.transform(z_test)
        z_train_scaled = z_train
        z_test_scaled = z_test

        # Set the first column to 1 since StandardScaler sets it to 0
        X_train_scaled[:, 0] = 1
        X_test_scaled[:, 0] = 1

        if method == f.OLS:
            beta_train_scaled = method(X_train_scaled, z_train_scaled)
            z_tilde_test_scaled = X_test_scaled @ beta_train_scaled
            z_tilde_train_scaled = X_train_scaled @ beta_train_scaled

        elif method == f.Ridge:
            beta_train_scaled = method(X_train_scaled, z_train_scaled, lmbda,
                                       degree)
            z_tilde_test_scaled = X_test_scaled @ beta_train_scaled
            z_tilde_train_scaled = X_train_scaled @ beta_train_scaled

        elif method == 'Lasso':
            clf_lasso = skl.Lasso(alpha=lmbda, fit_intercept=False).fit(
                X_train_scaled, z_train_scaled)
            z_tilde_test_scaled = clf_lasso.predict(X_test_scaled)
            z_tilde_train_scaled = clf_lasso.predict(X_train_scaled)

        MSE_train_scaled[degree] = f.MSE(z_train_scaled, z_tilde_train_scaled)
        MSE_test_scaled[degree] = f.MSE(z_test_scaled, z_tilde_test_scaled)
        R2Score_scaled[degree] = f.R2Score(z_test_scaled, z_tilde_test_scaled)

    return polydegree, MSE_train, MSE_test, MSE_train_scaled, MSE_test_scaled, R2Score_scaled

    # Start figure
    fig = plt.figure()
    ax = fig.gca(projection='3d')

    # Plot the surface
    # ztilde_plot = np.reshape(ztilde, (n, n))
    # surf = ax.plot_surface(x, y, ztilde_plot, cmap=cm.coolwarm, linewidth=0, antialiased=False)

    # Customize the z axis
    ax.set_zlim(-0.10, 1.40)
    ax.zaxis.set_major_locator(LinearLocator(10))
    ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))

    # Add a colar bar which maps values to colors
    fig.colorbar(surf, shrink=0.5, aspect=5)
    plt.show()
Esempio n. 8
0
    scaler.fit(X)

    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    print(f" z_train: {np.shape(z_train)}")
    print(f" X: {np.shape(X)}")
    print(f" X_train: {np.shape(X_train)}")
    z_test_scaled = X_test_scaled.dot(
        f.Ridge_func(X_train_scaled, z_train, 1E-5))
    z_train_scaled = X_train_scaled.dot(
        f.Ridge_func(X_train_scaled, z_train, 1E-5))
    print(f"x: {np.shape(x)}")
    print(f"y: {np.shape(y)}")

    MSEtrain[i] = f.MSE(z_train, z_train_scaled)
    MSEtest[i] = f.MSE(z_test, z_test_scaled)

plt.plot(deg, MSEtest, label="test")
plt.plot(deg, MSEtrain, label="train")
plt.legend()
plt.show()
"""
#optional plotting of surface
z_plot = np.reshape(z_, (25000, 25000))
#print(np.shape(z_))
# Plot the surface.

surf = ax.plot_surface(x, y, z_plot, cmap=cm.coolwarm, linewidth=0, antialiased=False)

# Customize the z axis.
Esempio n. 9
0
 def OLS(self):
     """Calculates the ordinary least squares and its mean squared error."""
     beta = f.OLS(self.X_train, self.z_train)
     z_tilde = self.X_test @ beta
     self.MSE = f.MSE(self.z_test, z_tilde)
Esempio n. 10
0
mse_heatmap = np.zeros((len(array_lambda), len(array_eta)))
index_array = np.arange(len(X_train))
for i, lmbda in enumerate(array_lambda):
    for j, eta in enumerate(array_eta):
        network.create_layers(hidden_act, output_act, seed)
        for k in range(n_epochs):
            np.random.shuffle(index_array)
            X_minibatches = np.split(X_train[index_array], n_batches)
            z_minibatches = np.split(z_train[index_array], n_batches)

            for l in range(n_batches):
                # eta = network.learning_rate(epoch*N + j, 2, 20)
                network.backprop(X_minibatches[l], z_minibatches[l], eta,
                                 lmbda)

        network.feedforward(X_test)
        mse_heatmap[i, j] = np.log10(f.MSE(z_test, network.layers[-1].a))

heatmap = sb.heatmap(mse_heatmap,
                     annot=mse_heatmap,
                     cmap='YlGnBu',
                     xticklabels=array_eta,
                     yticklabels=array_lambda,
                     cbar_kws={'label': 'MSE'})
heatmap.set_xlabel('$\eta$', size=12)
heatmap.set_ylabel('$\lambda$', size=12)
heatmap.invert_xaxis()
heatmap.set_title('RELU + Identity', size=16)
plt.show()
Esempio n. 11
0
    def train(self, epochs, batch_size, x, y, activation, derivative,\
     xvalidation, yvalidation, verbose=False):
        '''
        inputs:
        epochs = max epochs
        batch_size = self explanatory
        x,y = the datset used for training
        activation = list of activation functions
        derivative = list of derivatives (i know this can be done better)
        xvalidation = validation design matrix used in early stopping
        yvalidation = validation output data used in early stopping
        verbose == False no printing of validation loss, == True validation loss is printed each epoch

        outputs:
        No outputs, but hopefully a well trained network 
        '''
        tmp = int(len(y) / batch_size)
        Niter = min(200, tmp)
        indexes = np.arange(len(y))
        cost = np.empty([epochs])

        self.cost_val = list()
        self.cost_train = list()

        for i in range(epochs):
            for j in range(Niter):

                datapoints = np.random.choice(indexes,
                                              size=batch_size,
                                              replace=False)
                batch_x = x[datapoints, :]
                batch_y = y[datapoints]

                self.feed(batch_x, activation)
                self.back(batch_x, batch_y, derivative)

            pred_val = self.feed_out(xvalidation, activation)
            pred_train = self.feed_out(batch_x, activation)
            if self.mode == 'regression':
                self.cost_val.append(
                    fx.MSE(pred_val.ravel(), yvalidation.ravel()))
                self.cost_train.append(
                    fx.MSE(pred_train.ravel(), batch_y.ravel()))
            if self.mode == 'classification':
                self.cost_val.append(
                    lrf.cost_log_ols(pred_val.ravel(), yvalidation.T))
                self.cost_train.append(
                    lrf.cost_log_ols(pred_train.ravel(), batch_y.T))
            if i > self.early_stop_nochange:
                avg_indx_full = np.arange(i - self.early_stop_nochange, i)
                avg_indx_full.astype(int)
                avg_indx = np.arange(i - 5, i)
                avg_indx.astype(int)

                if -self.early_stop_tol < np.mean(
                        np.array(self.cost_val)[avg_indx]) - np.mean(
                            np.array(self.cost_val)[avg_indx_full]):
                    break

            if verbose:
                print('Epoch', i + 1, 'loss', self.cost_val[i])
Esempio n. 12
0
scaler.fit(terrain)  # compute the mean and std to be used for later scaling
terrain_scaled = scaler.transform(
    terrain)  # perform standardization by centering and scaling

# Fixing a set of points
terrain_scaled = terrain_scaled[:n, :n]

# Create mesh of image pixel
x = np.sort(np.linspace(0, 1, terrain_scaled.shape[0]))
y = np.sort(np.linspace(0, 1, terrain_scaled.shape[1]))
x, y = np.meshgrid(x, y)

X = f.design_matrix(x, y, best_degree)
z_tilde = X @ beta_best
z_tilde = z_tilde.reshape(x.shape[0], x.shape[1])
print(f.MSE(terrain_scaled, z_tilde))

X_sklearn = f.design_matrix(x, y, best_degree_sklearn)
z_tilde_sklearn = X_sklearn @ beta_best_sklearn
z_tilde_sklearn = z_tilde_sklearn.reshape(x.shape[0], x.shape[1])
print(f.MSE(terrain_scaled, z_tilde_sklearn))

plt.subplot(131)
plt.imshow(terrain_scaled, cmap='gist_rainbow')

plt.subplot(132)
plt.imshow(z_tilde, cmap='gist_rainbow')

plt.subplot(133)
plt.imshow(z_tilde_sklearn, cmap='gist_rainbow')
plt.show()