def main():
    # Load the diabetes dataset
    diabetes = datasets.load_diabetes()

    # Use only one feature
    X = np.expand_dims(diabetes.data[:, 2], 1)

    # Split the data into training/testing sets
    X_train, X_test = np.array(X[:-20]), np.array(X[-20:])

    # Split the targets into training/testing sets
    y_train, y_test = np.array(diabetes.target[:-20]), np.array(
        diabetes.target[-20:])

    # Finding regularization constant using cross validation
    lowest_error = float("inf")
    best_reg_factor = None
    print "Finding regularization constant using cross validation:"
    k = 10
    for regularization_factor in np.arange(0, 0.5, 0.0001):
        cross_validation_sets = k_fold_cross_validation_sets(X_train,
                                                             y_train,
                                                             k=k)
        mse = 0
        for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
            clf = RidgeRegression(delta=regularization_factor)
            clf.fit(_X_train, _y_train)
            y_pred = clf.predict(_X_test)
            _mse = mean_squared_error(_y_test, y_pred)
            mse += _mse
        mse /= k

        # Print the mean squared error
        print "\tMean Squared Error: %s (regularization: %s)" % (
            mse, regularization_factor)

        # Save reg. constant that gave lowest error
        if mse < lowest_error:
            best_reg_factor = regularization_factor
            lowest_error = mse

    # Make final prediction
    clf = RidgeRegression(delta=best_reg_factor)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print "Mean squared error: %s (given by reg. factor: %s)" % (
        lowest_error, best_reg_factor)
    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.show()
def main():
    # Load the diabetes dataset
    X, y = datasets.make_regression(n_features=1,
                                    n_samples=100,
                                    bias=3,
                                    noise=10)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    # Finding regularization constant using cross validation
    lowest_error = float("inf")
    best_reg_factor = None
    print("Finding regularization constant using cross validation:")
    k = 10
    for regularization_factor in np.arange(0, 0.3, 0.001):
        cross_validation_sets = k_fold_cross_validation_sets(X_train,
                                                             y_train,
                                                             k=k)
        mse = 0
        for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
            clf = RidgeRegression(delta=regularization_factor)
            clf.fit(_X_train, _y_train)
            y_pred = clf.predict(_X_test)
            _mse = mean_squared_error(_y_test, y_pred)
            mse += _mse
        mse /= k

        # Print the mean squared error
        print("\tMean Squared Error: %s (regularization: %s)" %
              (mse, regularization_factor))

        # Save reg. constant that gave lowest error
        if mse < lowest_error:
            best_reg_factor = regularization_factor
            lowest_error = mse

    # Make final prediction
    clf = RidgeRegression(delta=best_reg_factor)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean squared error: %s (given by reg. factor: %s)" %
          (lowest_error, best_reg_factor))
    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.title("Ridge Regression (%.2f MSE)" % mse)
    plt.show()
def main():

    # Load temperature data
    data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = time
    y = temp

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = PolynomialRegression(degree=2, n_iterations=3000)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    # Print the mean squared error
    print("Mean Squared Error:", mse)

    # Plot the results
    m = plt.scatter(X_test[:, 0], y_test, color='gray', s=10)
    p = plt.scatter(X_test[:, 0], y_pred, color='black', s=15)
    plt.suptitle(
        "Linear Regression of temperature data in Linkoping, Sweden 2016")
    plt.title("(%.2f MSE)" % mse)
    plt.xlabel('Fraction of year')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m, p), ("Measurements", "Prediction"),
               scatterpoints=1,
               loc='lower right')

    plt.show()
def main():
    # Load the diabetes dataset
    diabetes = load_diabetes_dataset(dir_path + r"/../data/diabetes.csv")
    X = diabetes['X']
    y = diabetes['target']
    # Use only one feature
    X = X[:, np.newaxis, 2]

    # Split the data into training/testing sets
    x_train, x_test = X[:-20], X[-20:]

    # Split the targets into training/testing sets
    y_train, y_test = y[:-20], y[-20:]

    clf = LinearRegression()
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    # Print the mean squared error
    print "Mean Squared Error:", mean_squared_error(y_test, y_pred)

    # Plot the results
    plt.scatter(x_test[:, 0], y_test, color='black')
    plt.plot(x_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.show()
def main():
	# Load the diabetes dataset
	diabetes = datasets.load_diabetes()

	# Use only one feature
	X = np.expand_dims(diabetes.data[:, 2], 1)

	# Split the data into training/testing sets
	X_train, X_test = np.array(X[:-20]), np.array(X[-20:])

	# Split the targets into training/testing sets
	y_train, y_test = np.array(diabetes.target[:-20]), np.array(diabetes.target[-20:])

	# Finding regularization constant using cross validation
	lowest_error = float("inf")
	best_reg_factor = None
	print "Finding regularization constant using cross validation:"
	k = 10
	for regularization_factor in np.arange(0,0.5,0.0001):
		cross_validation_sets = k_fold_cross_validation_sets(X_train, y_train, k=k)
		mse = 0
		for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
			clf = RidgeRegression(delta=regularization_factor)
			clf.fit(_X_train, _y_train)
			y_pred = clf.predict(_X_test)
			_mse = mean_squared_error(_y_test, y_pred)
			mse += _mse
		mse /= k

		# Print the mean squared error
		print "\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor)

		# Save reg. constant that gave lowest error
		if mse < lowest_error:
			best_reg_factor = regularization_factor
			lowest_error = mse

	# Make final prediction
	clf = RidgeRegression(delta=best_reg_factor)
	clf.fit(X_train, y_train)
	y_pred = clf.predict(X_test)
	mse = mean_squared_error(y_test, y_pred)
	print "Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor)
	# Plot the results
	plt.scatter(X_test[:,0], y_test,  color='black')
	plt.plot(X_test[:,0], y_pred, color='blue', linewidth=3)
	plt.show()
def main():
    # Load the diabetes dataset
    X, y = datasets.make_regression(n_features=1, n_samples=100, bias=3, noise=10)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    # Finding regularization constant using cross validation
    lowest_error = float("inf")
    best_reg_factor = None
    print ("Finding regularization constant using cross validation:")
    k = 10
    for regularization_factor in np.arange(0, 0.3, 0.001):
        cross_validation_sets = k_fold_cross_validation_sets(
            X_train, y_train, k=k)
        mse = 0
        for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
            clf = RidgeRegression(delta=regularization_factor)
            clf.fit(_X_train, _y_train)
            y_pred = clf.predict(_X_test)
            _mse = mean_squared_error(_y_test, y_pred)
            mse += _mse
        mse /= k

        # Print the mean squared error
        print ("\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor))

        # Save reg. constant that gave lowest error
        if mse < lowest_error:
            best_reg_factor = regularization_factor
            lowest_error = mse

    # Make final prediction
    clf = RidgeRegression(delta=best_reg_factor)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print ("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor))
    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.title("Ridge Regression (%.2f MSE)" % mse)
    plt.show()
def main():

    print("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = GradientBoostingClassifier(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Gradient Boosting",
                   accuracy=accuracy,
                   legend_labels=data.target_names)

    print("-- Gradient Boosting Regression --")

    X, y = datasets.make_regression(n_features=1,
                                    n_samples=150,
                                    bias=0,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X),
                                                        y,
                                                        test_size=0.5)

    clf = GradientBoostingRegressor(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    print("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.title("Gradient Boosting Regression (%.2f MSE)" % mse)
    plt.show()
def main():
    X, y = make_regression(n_samples=100, n_features=1, noise=20)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    n_samples, n_features = np.shape(X)

    model = PolynomialRidgeRegression(reg_factor=0.1,
                                      degree=3,
                                      n_iterations=100,
                                      learning_rate=0.001)

    model.fit(X_train, y_train)

    # Training error plot
    n = len(model.training_errors)
    training, = plt.plot(range(n),
                         model.training_errors,
                         label="Training Error")
    plt.legend(handles=[training])
    plt.title("Error Plot")
    plt.ylabel('Mean Squared Error')
    plt.xlabel('Iterations')
    plt.show()

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean squared error: %s" % (mse))

    y_pred_line = model.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    plt.plot(366 * X,
             y_pred_line,
             color='black',
             linewidth=2,
             label="Prediction")
    plt.suptitle("PolynomialRegression Regression")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.show()
Beispiel #9
0
def main():

    X, y = datasets.make_regression(n_features=1, n_samples=100, bias=0, noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.3)

    clf = RegressionTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # Print the mean squared error
    print "Mean Squared Error:", mean_squared_error(y_test, y_pred)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.show()
Beispiel #10
0
def main():

    # Load temperature data
    data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = time  # fraction of the year [0, 1]
    y = temp

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = PolynomialRegression(degree=6, n_iterations=100000)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    # Generate data for prediction line
    X_pred_ = np.arange(0, 1, 0.001).reshape((1000, 1))
    y_pred_ = clf.predict(X=X_pred_)

    # Print the mean squared error
    print("Mean Squared Error:", mse)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    p = plt.plot(366 * X_pred_,
                 y_pred_,
                 color="black",
                 linewidth=2,
                 label="Prediction")
    plt.suptitle("Polynomial Regression")
    plt.title("MSE: %.2f" % mse)
    plt.xlabel('Days')
    plt.ylabel('Temperature in Celcius')
    plt.legend(loc='lower right')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')

    plt.show()
def main():

    print ("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = GradientBoostingClassifier(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, 
        title="Gradient Boosting", 
        accuracy=accuracy, 
        legend_labels=data.target_names)

    print ("-- Gradient Boosting Regression --")

    X, y = datasets.make_regression(n_features=1, n_samples=150, bias=0, noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.5)

    clf = GradientBoostingRegressor(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    print ("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.title("Gradient Boosting Regression (%.2f MSE)" % mse)
    plt.show()
def main():

    print("-- Classification Tree --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=2)

    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print("Accuracy:", accuracy_score(y_test, y_pred))

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)

    print("-- Regression Tree --")

    X, y = datasets.make_regression(n_features=1,
                                    n_samples=100,
                                    bias=0,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X),
                                                        y,
                                                        test_size=0.3)

    clf = RegressionTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.show()
def main():

    X, y = datasets.make_regression(n_features=1, n_samples=200, bias=100, noise=5)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = LinearRegression()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    # Print the mean squared error
    print ("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.title("Linear Regression (%.2f MSE)" % mse)
    plt.show()
Beispiel #14
0
def main():

    X, y = datasets.make_regression(n_features=1,
                                    n_samples=200,
                                    bias=100,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = LinearRegression()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # Print the mean squared error
    print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.show()
Beispiel #15
0
def main():
    X, y = datasets.make_regression(n_features=1,
                                    n_samples=200,
                                    bias=100,
                                    noise=5)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
    print(X_train.shape, y_train.shape)
    clf = LinearRegression(gradient_descent=False)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(y_pred.shape)
    print(X_test.shape)
    mse = mean_squared_error(y_test, y_pred)

    print("Mean squared error", mse)

    plt.scatter(X_test, y_test, color='black')
    plt.plot(X_test, y_pred, color='red', lw=4)
    plt.title("Linear Regression")
    plt.show()
def main():

    print("-- Regression Tree --")

    # Load temperature data
    data = pd.read_csv('../datasets/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = standardize(time)  # Time. Fraction of the year [0, 1]
    y = temp[:, 0]  # Temperature. Reduce to one-dim

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    model = RegressionTree()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    y_pred_line = model.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    mse = mean_squared_error(y_test, y_pred)

    print("Mean Squared Error:", mse)

    # Plot the results
    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10)
    plt.suptitle("Regression Tree")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"),
               loc='lower right')
    plt.show()
Beispiel #17
0
def main():
    X, y = make_regression(n_samples=100, n_features=1, noise=20)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    n_samples, n_features = np.shape(X)

    model = LocallyWeightedLinearRegression()

    model.fit(X_test[0], X_train, y_train)

    # # Training error plot
    # n = len(model.training_errors)
    # training, = plt.plot(range(n), model.training_errors, label="Training Error")
    # plt.legend(handles=[training])
    # plt.title("Error Plot")
    # plt.ylabel('Mean Squared Error')
    # plt.xlabel('Iterations')
    # plt.show()

    y_pred = model.predict(X_test[0], X_train, y_train)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean squared error: %s" % (mse))
    print("predicted number is: %u" % (y_pred))
def main():

    # Load temperature data
    data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = time # fraction of the year [0, 1]
    y = temp

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    n_samples, n_features = np.shape(X)

    # Prior parameters
    # - Weights are assumed distr. according to a Normal distribution
    # - The variance of the weights are assumed distributed according to 
    #   a scaled inverse chi-squared distribution.
    # High prior uncertainty!
    # Normal
    mu0 = np.array([0] * n_features)
    omega0 = np.diag([.0001] * n_features)
    # Scaled inverse chi-squared
    nu0 = 1
    sigma_sq0 = 100

    # The credible interval
    cred_int = 10

    clf = BayesianRegression(n_draws=2000, 
        poly_degree=4, 
        mu0=mu0, 
        omega0=omega0, 
        nu0=nu0, 
        sigma_sq0=sigma_sq0,
        cred_int=cred_int)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    # Get prediction line
    y_pred_, y_lower_, y_upper_ = clf.predict(X=X, eti=True)

    # Print the mean squared error
    print ("Mean Squared Error:", mse)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction")
    p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(cred_int))
    p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2)
    plt.axis((0, 366, -20, 25))
    plt.suptitle("Bayesian Regression")
    plt.title("MSE: %.2f" % mse)
    plt.xlabel('Days')
    plt.ylabel('Temperature in Celcius')
    plt.legend(loc='lower right')
    # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.legend(loc='lower right')

    plt.show()