예제 #1
0
Error_train_nofeatures = np.empty((K, 1))
Error_test_nofeatures = np.empty((K, 1))
w_rlr = np.matrix(np.empty((M, K)))
w_noreg = np.matrix(np.empty((M, K)))

k = 0
for train_index, test_index in CV.split(X, y):

    # extract training and test set for current CV fold
    X_train = X[train_index]
    y_train = y[train_index]
    X_test = X[test_index]
    y_test = y[test_index]
    internal_cross_validation = 10

    opt_val_err, opt_lambda, mean_w_vs_lambda, train_err_vs_lambda, test_err_vs_lambda = rlr_validate(
        X_train, y_train, lambdas, internal_cross_validation)

    Xty = X_train.T @ y_train
    XtX = X_train.T @ X_train

    # Compute mean squared error without using the input data at all
    Error_train_nofeatures[k] = np.square(
        y_train - y_train.mean()).sum() / y_train.shape[0]
    Error_test_nofeatures[k] = np.square(y_test -
                                         y_test.mean()).sum() / y_test.shape[0]

    # Estimate weights for the optimal value of lambda, on entire training set
    w_rlr[:, k] = np.linalg.lstsq(XtX + opt_lambda * np.eye(M), Xty)[0]
    # Compute mean squared error with regularization with optimal lambda
    Error_train_rlr[k] = np.square(
        y_train - X_train @ w_rlr[:, k]).sum() / y_train.shape[0]
예제 #2
0
print("mean: ", std.mean(y))
print("standard deviation: ", std.stdev(y))

y_label = "mass"
contCols = np.delete(contCols, 1, axis=1)
X = np.column_stack((contCols, X_enc))

# offset attribute
Xoff = np.concatenate((np.ones((X.shape[0], 1)), X), 1)
N, M = Xoff.shape

#lambda
lambdas = np.logspace(0, 5)
K = 10

opt_val_err, opt_lambda, mean_vs_lambda, train_err_vs_lambda, test_err_vs_lambda = rlr_validate(
    X, y, lambdas, cvf=K)

plt.title('Model error vs regularization parameter')
plt.loglog(
    lambdas,
    train_err_vs_lambda.T,
    lambdas,
    test_err_vs_lambda.T,
)
plt.semilogx(opt_lambda, opt_val_err, markersize=10, marker='D')
plt.xlabel('Regularization parameter')
plt.ylabel('Model error rate')
plt.legend(['Training error', 'Test error', 'Test minimum'])
plt.grid()
# plt.show()
예제 #3
0
k = 0
for train_index, test_index in CV1.split(X, y):
    print("k = ", k)

    # extract training and test set for current CV fold
    X_train = X[train_index]
    y_train = y[train_index]
    X_test = X[test_index]
    y_test = y[test_index]

    X_ANN_train = X_ANN[train_index]
    y_ANN_train = y_ANN[train_index]
    X_ANN_test = X_ANN[test_index]
    y_ANN_test = y_ANN[test_index]

    opt_val_err, opt_lambda, mean_w_vs_lambda, train_err_vs_lambda, test_err_vs_lambda = rlr_validate(
        X_train, y_train, lambdas, K2)

    Opt_lambda_arr[k] = opt_lambda

    # Standardize outer fold based on training set, and save the mean and standard
    # deviations since they're part of the model (they would be needed for
    # making new predictions) - for brevity we won't always store these in the scripts
    mu[k, :] = np.mean(X_train[:, 1:], 0)
    sigma[k, :] = np.std(X_train[:, 1:], 0)

    X_train[:, 1:] = (X_train[:, 1:] - mu[k, :]) / sigma[k, :]
    X_test[:, 1:] = (X_test[:, 1:] - mu[k, :]) / sigma[k, :]

    Xty = X_train.T @ y_train
    XtX = X_train.T @ X_train
Error_train_nofeatures = np.empty((K,1))
Error_test_nofeatures = np.empty((K,1))
w_rlr = np.matrix(np.empty((M,K)))
w_noreg = np.matrix(np.empty((M,K)))

k=0
for train_index, test_index in CV:
    
    # extract training and test set for current CV fold
    X_train = X[train_index]
    y_train = y[train_index]
    X_test = X[test_index]
    y_test = y[test_index]
    internal_cross_validation = 10    
    
    opt_val_err, opt_lambda, mean_w_vs_lambda, train_err_vs_lambda, test_err_vs_lambda = rlr_validate(X_train, y_train, lambdas, internal_cross_validation)
    print "======================================="
    print opt_lambda
    print train_err_vs_lambda
    print test_err_vs_lambda
    print "======================================"


    Xty = X_train.T*y_train
    XtX = X_train.T*X_train
    
    # Compute mean squared error without using the input data at all
    Error_train_nofeatures[k] = np.square(y_train-y_train.mean()).sum()/y_train.shape[0]
    Error_test_nofeatures[k] = np.square(y_test-y_test.mean()).sum()/y_test.shape[0]

    # Estimate weights for the optimal value of lambda, on entire training set
sigma = np.empty((K, M-1))
w_noreg = np.empty((M,K))
all_lamdas = np.empty((K))

Z_lr = []
Z_base = []
zann = []


yhat_lr = []
yhat_lr_base = []
yhat_ANN = []


### part A using algo 5
opt_val_err, opt_lambda, mean_w_vs_lambda, train_err_vs_lambda, test_err_vs_lambda = rlr_validate(Xreg, Ysbp, lambdas, 10)
figure(1, figsize=(17,8))
subplot(1,2,1)
semilogx(lambdas,mean_w_vs_lambda.T[:,1:],'.-') # Don't plot the bias term
xlabel('Regularization factor')
ylabel('Mean Coefficient Values')
grid()
# You can choose to display the legend, but it's omitted for a cleaner 
# plot, since there are many attributes
#legend(attributeNames[1:], loc='best')
subplot(1,2,2)
title('Optimal lambda: {0}'.format((opt_lambda)))
loglog(lambdas,train_err_vs_lambda.T,'b.-',lambdas,test_err_vs_lambda.T,'r.-')
xlabel('Regularization factor')
ylabel('Squared error (crossvalidation)')
legend(['Train error','test error'])
예제 #6
0
for (k, (train_index, test_index)) in enumerate(CV.split(X,y)):
    print('\nCrossvalidation fold: {0}/{1}'.format(k+1,K))
    
    # extract training and test set for current CV fold
    X_train = X[train_index]
    y_train = y[train_index]
    X_test = X[test_index]
    y_test = y[test_index]
    internal_cross_validation = 10
    
    
    
    
    
    # Model 1: Linear regression model
    opt_val_err, opt_lambda, mean_w_vs_lambda, train_err_vs_lambda, test_err_vs_lambda = rlr_validate(X_train, np.array(y_train.T)[0], lambdas, internal_cross_validation)
    
    m = lm.LinearRegression().fit(X_train, y_train)
    MSE_train[k,1] = np.square(y_train-m.predict(X_train)).sum()/y_train.shape[0]
    MSE_test[k,1] = np.square(y_test-m.predict(X_test)).sum()/y_test.shape[0]
    
    
    # Model 3: Baseline - Compute mean squared error without using the input data at all
    MSE_train[k,2] = np.square(y_train-y_train.mean()).sum(axis=0)/y_train.shape[0]
    MSE_test[k,2] = np.square(y_test-y_test.mean()).sum(axis=0)/y_test.shape[0]
    
    print('\nOptimal Lambda: {0}'.format(opt_lambda))