def main(): # Load the diabetes dataset diabetes = datasets.load_diabetes() # Use only one feature X = np.expand_dims(diabetes.data[:, 2], 1) # Split the data into training/testing sets X_train, X_test = np.array(X[:-20]), np.array(X[-20:]) # Split the targets into training/testing sets y_train, y_test = np.array(diabetes.target[:-20]), np.array( diabetes.target[-20:]) # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print "Finding regularization constant using cross validation:" k = 10 for regularization_factor in np.arange(0, 0.5, 0.0001): cross_validation_sets = k_fold_cross_validation_sets(X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: clf = RidgeRegression(delta=regularization_factor) clf.fit(_X_train, _y_train) y_pred = clf.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print "\tMean Squared Error: %s (regularization: %s)" % ( mse, regularization_factor) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = regularization_factor lowest_error = mse # Make final prediction clf = RidgeRegression(delta=best_reg_factor) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print "Mean squared error: %s (given by reg. factor: %s)" % ( lowest_error, best_reg_factor) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.show()
def main(): # Load the diabetes dataset X, y = datasets.make_regression(n_features=1, n_samples=100, bias=3, noise=10) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print("Finding regularization constant using cross validation:") k = 10 for regularization_factor in np.arange(0, 0.3, 0.001): cross_validation_sets = k_fold_cross_validation_sets(X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: clf = RidgeRegression(delta=regularization_factor) clf.fit(_X_train, _y_train) y_pred = clf.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print("\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor)) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = regularization_factor lowest_error = mse # Make final prediction clf = RidgeRegression(delta=best_reg_factor) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor)) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.title("Ridge Regression (%.2f MSE)" % mse) plt.show()
def main(): # Load temperature data data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].as_matrix()).T temp = np.atleast_2d(data["temp"].as_matrix()).T X = time y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = PolynomialRegression(degree=2, n_iterations=3000) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) # Print the mean squared error print("Mean Squared Error:", mse) # Plot the results m = plt.scatter(X_test[:, 0], y_test, color='gray', s=10) p = plt.scatter(X_test[:, 0], y_pred, color='black', s=15) plt.suptitle( "Linear Regression of temperature data in Linkoping, Sweden 2016") plt.title("(%.2f MSE)" % mse) plt.xlabel('Fraction of year') plt.ylabel('Temperature in Celcius') plt.legend((m, p), ("Measurements", "Prediction"), scatterpoints=1, loc='lower right') plt.show()
def main(): # Load the diabetes dataset diabetes = load_diabetes_dataset(dir_path + r"/../data/diabetes.csv") X = diabetes['X'] y = diabetes['target'] # Use only one feature X = X[:, np.newaxis, 2] # Split the data into training/testing sets x_train, x_test = X[:-20], X[-20:] # Split the targets into training/testing sets y_train, y_test = y[:-20], y[-20:] clf = LinearRegression() clf.fit(x_train, y_train) y_pred = clf.predict(x_test) # Print the mean squared error print "Mean Squared Error:", mean_squared_error(y_test, y_pred) # Plot the results plt.scatter(x_test[:, 0], y_test, color='black') plt.plot(x_test[:, 0], y_pred, color='blue', linewidth=3) plt.show()
def main(): # Load the diabetes dataset diabetes = datasets.load_diabetes() # Use only one feature X = np.expand_dims(diabetes.data[:, 2], 1) # Split the data into training/testing sets X_train, X_test = np.array(X[:-20]), np.array(X[-20:]) # Split the targets into training/testing sets y_train, y_test = np.array(diabetes.target[:-20]), np.array(diabetes.target[-20:]) # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print "Finding regularization constant using cross validation:" k = 10 for regularization_factor in np.arange(0,0.5,0.0001): cross_validation_sets = k_fold_cross_validation_sets(X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: clf = RidgeRegression(delta=regularization_factor) clf.fit(_X_train, _y_train) y_pred = clf.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print "\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = regularization_factor lowest_error = mse # Make final prediction clf = RidgeRegression(delta=best_reg_factor) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print "Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor) # Plot the results plt.scatter(X_test[:,0], y_test, color='black') plt.plot(X_test[:,0], y_pred, color='blue', linewidth=3) plt.show()
def main(): # Load the diabetes dataset X, y = datasets.make_regression(n_features=1, n_samples=100, bias=3, noise=10) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # Finding regularization constant using cross validation lowest_error = float("inf") best_reg_factor = None print ("Finding regularization constant using cross validation:") k = 10 for regularization_factor in np.arange(0, 0.3, 0.001): cross_validation_sets = k_fold_cross_validation_sets( X_train, y_train, k=k) mse = 0 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets: clf = RidgeRegression(delta=regularization_factor) clf.fit(_X_train, _y_train) y_pred = clf.predict(_X_test) _mse = mean_squared_error(_y_test, y_pred) mse += _mse mse /= k # Print the mean squared error print ("\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor)) # Save reg. constant that gave lowest error if mse < lowest_error: best_reg_factor = regularization_factor lowest_error = mse # Make final prediction clf = RidgeRegression(delta=best_reg_factor) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor)) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.title("Ridge Regression (%.2f MSE)" % mse) plt.show()
def main(): print("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = GradientBoostingClassifier(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Gradient Boosting", accuracy=accuracy, legend_labels=data.target_names) print("-- Gradient Boosting Regression --") X, y = datasets.make_regression(n_features=1, n_samples=150, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.5) clf = GradientBoostingRegressor(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.title("Gradient Boosting Regression (%.2f MSE)" % mse) plt.show()
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) model = PolynomialRidgeRegression(reg_factor=0.1, degree=3, n_iterations=100, learning_rate=0.001) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s" % (mse)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("PolynomialRegression Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def main(): X, y = datasets.make_regression(n_features=1, n_samples=100, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.3) clf = RegressionTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # Print the mean squared error print "Mean Squared Error:", mean_squared_error(y_test, y_pred) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.show()
def main(): # Load temperature data data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].as_matrix()).T temp = np.atleast_2d(data["temp"].as_matrix()).T X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = PolynomialRegression(degree=6, n_iterations=100000) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) # Generate data for prediction line X_pred_ = np.arange(0, 1, 0.001).reshape((1000, 1)) y_pred_ = clf.predict(X=X_pred_) # Print the mean squared error print("Mean Squared Error:", mse) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) p = plt.plot(366 * X_pred_, y_pred_, color="black", linewidth=2, label="Prediction") plt.suptitle("Polynomial Regression") plt.title("MSE: %.2f" % mse) plt.xlabel('Days') plt.ylabel('Temperature in Celcius') plt.legend(loc='lower right') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def main(): print ("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = GradientBoostingClassifier(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Gradient Boosting", accuracy=accuracy, legend_labels=data.target_names) print ("-- Gradient Boosting Regression --") X, y = datasets.make_regression(n_features=1, n_samples=150, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.5) clf = GradientBoostingRegressor(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean Squared Error:", mse) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.title("Gradient Boosting Regression (%.2f MSE)" % mse) plt.show()
def main(): print("-- Classification Tree --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) pca = PCA() pca.plot_in_2d(X_test, y_pred) print("-- Regression Tree --") X, y = datasets.make_regression(n_features=1, n_samples=100, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.3) clf = RegressionTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Mean Squared Error:", mean_squared_error(y_test, y_pred)) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.show()
def main(): X, y = datasets.make_regression(n_features=1, n_samples=200, bias=100, noise=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = LinearRegression() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) # Print the mean squared error print ("Mean Squared Error:", mse) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.title("Linear Regression (%.2f MSE)" % mse) plt.show()
def main(): X, y = datasets.make_regression(n_features=1, n_samples=200, bias=100, noise=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = LinearRegression() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # Print the mean squared error print("Mean Squared Error:", mean_squared_error(y_test, y_pred)) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3) plt.show()
def main(): X, y = datasets.make_regression(n_features=1, n_samples=200, bias=100, noise=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) print(X_train.shape, y_train.shape) clf = LinearRegression(gradient_descent=False) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print(y_pred.shape) print(X_test.shape) mse = mean_squared_error(y_test, y_pred) print("Mean squared error", mse) plt.scatter(X_test, y_test, color='black') plt.plot(X_test, y_pred, color='red', lw=4) plt.title("Linear Regression") plt.show()
def main(): print("-- Regression Tree --") # Load temperature data data = pd.read_csv('../datasets/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].as_matrix()).T temp = np.atleast_2d(data["temp"].as_matrix()).T X = standardize(time) # Time. Fraction of the year [0, 1] y = temp[:, 0] # Temperature. Reduce to one-dim X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = RegressionTree() model.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse) # Plot the results # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) model = LocallyWeightedLinearRegression() model.fit(X_test[0], X_train, y_train) # # Training error plot # n = len(model.training_errors) # training, = plt.plot(range(n), model.training_errors, label="Training Error") # plt.legend(handles=[training]) # plt.title("Error Plot") # plt.ylabel('Mean Squared Error') # plt.xlabel('Iterations') # plt.show() y_pred = model.predict(X_test[0], X_train, y_train) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s" % (mse)) print("predicted number is: %u" % (y_pred))
def main(): # Load temperature data data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].as_matrix()).T temp = np.atleast_2d(data["temp"].as_matrix()).T X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) # Prior parameters # - Weights are assumed distr. according to a Normal distribution # - The variance of the weights are assumed distributed according to # a scaled inverse chi-squared distribution. # High prior uncertainty! # Normal mu0 = np.array([0] * n_features) omega0 = np.diag([.0001] * n_features) # Scaled inverse chi-squared nu0 = 1 sigma_sq0 = 100 # The credible interval cred_int = 10 clf = BayesianRegression(n_draws=2000, poly_degree=4, mu0=mu0, omega0=omega0, nu0=nu0, sigma_sq0=sigma_sq0, cred_int=cred_int) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) # Get prediction line y_pred_, y_lower_, y_upper_ = clf.predict(X=X, eti=True) # Print the mean squared error print ("Mean Squared Error:", mse) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction") p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(cred_int)) p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2) plt.axis((0, 366, -20, 25)) plt.suptitle("Bayesian Regression") plt.title("MSE: %.2f" % mse) plt.xlabel('Days') plt.ylabel('Temperature in Celcius') plt.legend(loc='lower right') # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.legend(loc='lower right') plt.show()