def main(): # Load the diabetes dataset diabetes = datasets.load_diabetes() # Use only one feature X = np.expand_dims(diabetes.data[:, 2], 1) # Split the data into training/testing sets X_train, X_test = np.array(X[:-20]), np.array(X[-20:]) # Split the targets into training/testing sets y_train, y_test = np.array(diabetes.target[:-20]), np.array( diabetes.target[-20:]) # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print "Finding regularization constant using cross validation:" k = 10 for regularization_factor in np.arange(0, 0.5, 0.0001): cross_validation_sets = k_fold_cross_validation_sets(X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: clf = RidgeRegression(delta=regularization_factor) clf.fit(_X_train, _y_train) y_pred = clf.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print "\tMean Squared Error: %s (regularization: %s)" % ( mse, regularization_factor) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = regularization_factor lowest_error = mse # Make final prediction clf = RidgeRegression(delta=best_reg_factor) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print "Mean squared error: %s (given by reg. factor: %s)" % ( lowest_error, best_reg_factor) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.show()
def main(): # Load the diabetes dataset X, y = datasets.make_regression(n_features=1, n_samples=100, bias=3, noise=10) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print("Finding regularization constant using cross validation:") k = 10 for regularization_factor in np.arange(0, 0.3, 0.001): cross_validation_sets = k_fold_cross_validation_sets(X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: clf = RidgeRegression(delta=regularization_factor) clf.fit(_X_train, _y_train) y_pred = clf.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print("\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor)) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = regularization_factor lowest_error = mse # Make final prediction clf = RidgeRegression(delta=best_reg_factor) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor)) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.title("Ridge Regression (%.2f MSE)" % mse) plt.show()
def main(): # Load the diabetes dataset diabetes = datasets.load_diabetes() # Use only one feature X = np.expand_dims(diabetes.data[:, 2], 1) # Split the data into training/testing sets X_train, X_test = np.array(X[:-20]), np.array(X[-20:]) # Split the targets into training/testing sets y_train, y_test = np.array(diabetes.target[:-20]), np.array(diabetes.target[-20:]) # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print "Finding regularization constant using cross validation:" k = 10 for regularization_factor in np.arange(0,0.5,0.0001): cross_validation_sets = k_fold_cross_validation_sets(X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: clf = RidgeRegression(delta=regularization_factor) clf.fit(_X_train, _y_train) y_pred = clf.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print "\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = regularization_factor lowest_error = mse # Make final prediction clf = RidgeRegression(delta=best_reg_factor) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print "Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor) # Plot the results plt.scatter(X_test[:,0], y_test, color='black') plt.plot(X_test[:,0], y_pred, color='blue', linewidth=3) plt.show()
def main(): # Load the diabetes dataset X, y = datasets.make_regression(n_features=1, n_samples=100, bias=3, noise=10) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print ("Finding regularization constant using cross validation:") k = 10 for regularization_factor in np.arange(0, 0.3, 0.001): cross_validation_sets = k_fold_cross_validation_sets( X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: clf = RidgeRegression(delta=regularization_factor) clf.fit(_X_train, _y_train) y_pred = clf.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print ("\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor)) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = regularization_factor lowest_error = mse # Make final prediction clf = RidgeRegression(delta=best_reg_factor) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor)) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.title("Ridge Regression (%.2f MSE)" % mse) plt.show()