Ejemplo n.º 1
0
def main():

    print("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = GradientBoostingClassifier(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Gradient Boosting",
                   accuracy=accuracy,
                   legend_labels=data.target_names)

    print("-- Gradient Boosting Regression --")

    X, y = datasets.make_regression(n_features=1,
                                    n_samples=150,
                                    bias=0,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X),
                                                        y,
                                                        test_size=0.5)

    clf = GradientBoostingRegressor(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    print("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.title("Gradient Boosting Regression (%.2f MSE)" % mse)
    plt.show()
Ejemplo n.º 2
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # Perceptron
    clf = Perceptron(n_iterations=4000, learning_rate=0.01, plot_errors=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Perceptron",
                   accuracy=accuracy,
                   legend=True)
Ejemplo n.º 3
0
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    digit1 = 1
    digit2 = 8
    idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
    y = data.target[idx]
    # Change labels to {-1, 1}
    y[y == digit1] = -1
    y[y == digit2] = 1
    X = data.data[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    # Adaboost classification
    clf = Adaboost(n_clf=5)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
Ejemplo n.º 4
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # Optimization method for finding weights that minimizes loss
    optimizer = RMSprop(learning_rate=0.01)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
                     activation_function=ExpLU,
                     optimizer=optimizer,
                     early_stopping=True,
                     plot_errors=True)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Perceptron",
                   accuracy=accuracy,
                   legend_labels=np.unique(y))
Ejemplo n.º 5
0
def main():

    print("-- XGBoost --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=2)

    clf = XGBoost(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="XGBoost",
                   accuracy=accuracy,
                   legend_labels=data.target_names)
Ejemplo n.º 6
0
def main():

    # Load temperature data
    data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = time
    y = temp

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = PolynomialRegression(degree=2, n_iterations=3000)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    # Print the mean squared error
    print("Mean Squared Error:", mse)

    # Plot the results
    m = plt.scatter(X_test[:, 0], y_test, color='gray', s=10)
    p = plt.scatter(X_test[:, 0], y_pred, color='black', s=15)
    plt.suptitle(
        "Linear Regression of temperature data in Linkoping, Sweden 2016")
    plt.title("(%.2f MSE)" % mse)
    plt.xlabel('Fraction of year')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m, p), ("Measurements", "Prediction"),
               scatterpoints=1,
               loc='lower right')

    plt.show()
Ejemplo n.º 7
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # MLP
    clf = MultilayerPerceptron(n_hidden=12,
                               n_iterations=5000,
                               learning_rate=0.01,
                               early_stopping=True,
                               plot_errors=True)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Multilayer Perceptron",
                   accuracy=accuracy,
                   legend_labels=np.unique(y))
Ejemplo n.º 8
0
def main():
    X, y = datasets.make_regression(n_features=1,
                                    n_samples=100,
                                    bias=3,
                                    noise=10)
    X_train, X_test, y_train, y_test = train_test_split(X, y, 0.3)
    clf = RidgeRegression()
Ejemplo n.º 9
0
def main():
    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']
    # Change class labels from strings to numbers
    # df = df.replace(to_replace="setosa", value="-1")
    # df = df.replace(to_replace="virginica", value="1")
    # df = df.replace(to_replace="versicolor", value="2")

    # Only select data for two classes
    #X = df.loc[df['species'] != "2"].drop("species", axis=1).as_matrix()
    #y = df.loc[df['species'] != "2"]["species"].as_matrix()

    X = X[y != 2]
    y = y[y != 2]
    y[y == 0] = -1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Adaboost classification
    clf = Adaboost(n_clf=8)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    clf = LogisticRegression(gradient_descent=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Logistic Regression",
                   accuracy=accuracy)
Ejemplo n.º 11
0
def main():

    print("-- Classification Tree --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=2)

    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print("Accuracy:", accuracy_score(y_test, y_pred))

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)

    print("-- Regression Tree --")

    X, y = datasets.make_regression(n_features=1,
                                    n_samples=100,
                                    bias=0,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X),
                                                        y,
                                                        test_size=0.3)

    clf = RegressionTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.show()
def main():

    print ("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = GradientBoostingClassifier(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, 
        title="Gradient Boosting", 
        accuracy=accuracy, 
        legend_labels=data.target_names)

    print ("-- Gradient Boosting Regression --")

    X, y = datasets.make_regression(n_features=1, n_samples=150, bias=0, noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.5)

    clf = GradientBoostingRegressor(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    print ("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.title("Gradient Boosting Regression (%.2f MSE)" % mse)
    plt.show()
Ejemplo n.º 13
0
def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, 0.3)

    clf = NaiveBayes()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print(accuracy_score(y_pred, y_test))
def main():
	iris = load_iris()
	X = normalize(iris.data)
	y = iris.target
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

	clf = KNN(k=3)
	y_pred = clf.predict(X_test, X_train, y_train)
	print "Accuracy score:", accuracy_score(y_test, y_pred)

	# Reduce dimensions to 2d using pca and plot the results
	pca = PCA()
	pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 15
0
def main():
    iris = load_iris()
    X = normalize(iris.data)
    y = iris.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = KNN(k=3)
    y_pred = clf.predict(X_test, X_train, y_train)
    print "Accuracy score:", accuracy_score(y_test, y_pred)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 16
0
def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, 0.3)
    knn = KNN(3)
    y_pred = knn.predict(X_test, X_train, y_train)
    accuracy = accuracy_score(y_pred, y_test)
    print("accuracy is ", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="knn", accuracy=accuracy)
Ejemplo n.º 17
0
def main():
	iris = datasets.load_iris()
	X = normalize(iris.data)
	y = iris.target
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

	clf = NaiveBayes()
	clf.fit(X_train, y_train)
	y_pred = clf.predict(X_test)

	print "Accuracy score:", accuracy_score(y_test, y_pred)

	# Reduce dimension to two using PCA and plot the results
	pca = PCA()
	pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 18
0
def main():
    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = RandomForest(n_estimators=50, debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():
    data=load_iris_dataset(dir_path + r"/../data/iris.csv")
    X=data['X']
    y=data['target']
    X = normalize(X)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = KNN(k=3)
    y_pred = clf.predict(X_test, X_train, y_train)
    print "Accuracy score:", accuracy_score(y_test, y_pred)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 20
0
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = RandomForest(n_estimators=50, debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 21
0
def main():
	data = datasets.load_digits()
	X = data.data
	y = data.target

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

	clf = RandomForest(n_estimators=50, debug=True)
	clf.fit(X_train, y_train)
	y_pred = clf.predict(X_test)

	print "Accuracy:", accuracy_score(y_test, y_pred)

	pca = PCA()
	pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 22
0
def main():
    iris = datasets.load_iris()
    X = normalize(iris.data)
    y = iris.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    clf = NaiveBayes()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy score:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():
    # Load the diabetes dataset
    X, y = datasets.make_regression(n_features=1,
                                    n_samples=100,
                                    bias=3,
                                    noise=10)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    # Finding regularization constant using cross validation
    lowest_error = float("inf")
    best_reg_factor = None
    print("Finding regularization constant using cross validation:")
    k = 10
    for regularization_factor in np.arange(0, 0.3, 0.001):
        cross_validation_sets = k_fold_cross_validation_sets(X_train,
                                                             y_train,
                                                             k=k)
        mse = 0
        for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
            clf = RidgeRegression(delta=regularization_factor)
            clf.fit(_X_train, _y_train)
            y_pred = clf.predict(_X_test)
            _mse = mean_squared_error(_y_test, y_pred)
            mse += _mse
        mse /= k

        # Print the mean squared error
        print("\tMean Squared Error: %s (regularization: %s)" %
              (mse, regularization_factor))

        # Save reg. constant that gave lowest error
        if mse < lowest_error:
            best_reg_factor = regularization_factor
            lowest_error = mse

    # Make final prediction
    clf = RidgeRegression(delta=best_reg_factor)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean squared error: %s (given by reg. factor: %s)" %
          (lowest_error, best_reg_factor))
    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.title("Ridge Regression (%.2f MSE)" % mse)
    plt.show()
def main():
    X, y = make_regression(n_samples=100, n_features=1, noise=20)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    n_samples, n_features = np.shape(X)

    model = PolynomialRidgeRegression(reg_factor=0.1,
                                      degree=3,
                                      n_iterations=100,
                                      learning_rate=0.001)

    model.fit(X_train, y_train)

    # Training error plot
    n = len(model.training_errors)
    training, = plt.plot(range(n),
                         model.training_errors,
                         label="Training Error")
    plt.legend(handles=[training])
    plt.title("Error Plot")
    plt.ylabel('Mean Squared Error')
    plt.xlabel('Iterations')
    plt.show()

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean squared error: %s" % (mse))

    y_pred_line = model.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    plt.plot(366 * X,
             y_pred_line,
             color='black',
             linewidth=2,
             label="Prediction")
    plt.suptitle("PolynomialRegression Regression")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.show()
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    # MLP
    clf = MultilayerPerceptron(n_hidden=10)
    clf.fit(X_train, y_train, n_iterations=4000, learning_rate=0.01)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = KNN(k=3)
    y_pred = clf.predict(X_test, X_train, y_train)
    
    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="K Nearest Neighbors", accuracy=accuracy, legend_labels=data.target_names)
Ejemplo n.º 27
0
def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Perceptron
    clf = Perceptron()
    clf.fit(X_train, y_train, plot_errors=True)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 28
0
def main():
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = -1
    y[y == 2] = 1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 29
0
def main():

    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = DecisionTree()
    clf.fit(X_train, y_train)
    # clf.print_tree()
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 30
0
def main():

    data = datasets.load_digits()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = DecisionTree()
    clf.fit(X_train, y_train)
    # clf.print_tree()
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 31
0
def main():

	data = datasets.load_digits()
	X = data.data
	y = data.target

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

	clf = DecisionTree()
	clf.fit(X_train, y_train)
	# clf.print_tree()
	y_pred = clf.predict(X_test)

	print "Accuracy:", accuracy_score(y_test, y_pred)

	pca = PCA()
	pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 32
0
def main():

    X, y = datasets.make_regression(n_features=1, n_samples=100, bias=0, noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.3)

    clf = RegressionTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    # Print the mean squared error
    print "Mean Squared Error:", mean_squared_error(y_test, y_pred)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.show()
Ejemplo n.º 33
0
def main():
    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=2)

    clf = RandomForest(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)
Ejemplo n.º 34
0
def main():
    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']
    X = normalize(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Perceptron
    clf = Perceptron()
    clf.fit(X_train, y_train, plot_errors=True)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 35
0
def main():

    # Load temperature data
    data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = time  # fraction of the year [0, 1]
    y = temp

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = PolynomialRegression(degree=6, n_iterations=100000)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    # Generate data for prediction line
    X_pred_ = np.arange(0, 1, 0.001).reshape((1000, 1))
    y_pred_ = clf.predict(X=X_pred_)

    # Print the mean squared error
    print("Mean Squared Error:", mse)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    p = plt.plot(366 * X_pred_,
                 y_pred_,
                 color="black",
                 linewidth=2,
                 label="Prediction")
    plt.suptitle("Polynomial Regression")
    plt.title("MSE: %.2f" % mse)
    plt.xlabel('Days')
    plt.ylabel('Temperature in Celcius')
    plt.legend(loc='lower right')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')

    plt.show()
Ejemplo n.º 36
0
def main():
    data = datasets.load_iris()
    X = data.data
    y = data.target

    X = X[y != 2]
    y = y[y != 2]
    y[y == 0] = -1
    y[y == 1] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    # Adaboost classification
    clf = Adaboost(n_clf=10)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
Ejemplo n.º 37
0
def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    clf = NaiveBayes()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main():
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = -1
    y[y == 2] = 1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)
def main():
    # Load the diabetes dataset
    X, y = datasets.make_regression(n_features=1, n_samples=100, bias=3, noise=10)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    # Finding regularization constant using cross validation
    lowest_error = float("inf")
    best_reg_factor = None
    print ("Finding regularization constant using cross validation:")
    k = 10
    for regularization_factor in np.arange(0, 0.3, 0.001):
        cross_validation_sets = k_fold_cross_validation_sets(
            X_train, y_train, k=k)
        mse = 0
        for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
            clf = RidgeRegression(delta=regularization_factor)
            clf.fit(_X_train, _y_train)
            y_pred = clf.predict(_X_test)
            _mse = mean_squared_error(_y_test, y_pred)
            mse += _mse
        mse /= k

        # Print the mean squared error
        print ("\tMean Squared Error: %s (regularization: %s)" % (mse, regularization_factor))

        # Save reg. constant that gave lowest error
        if mse < lowest_error:
            best_reg_factor = regularization_factor
            lowest_error = mse

    # Make final prediction
    clf = RidgeRegression(delta=best_reg_factor)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print ("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor))
    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.title("Ridge Regression (%.2f MSE)" % mse)
    plt.show()
def main():

    X, y = datasets.make_regression(n_features=1, n_samples=200, bias=100, noise=5)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = LinearRegression()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    # Print the mean squared error
    print ("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)
    plt.title("Linear Regression (%.2f MSE)" % mse)
    plt.show()
Ejemplo n.º 41
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
        learning_rate=0.01, 
        early_stopping=True,
        plot_errors=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    clf = LogisticRegression(gradient_descent=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
Ejemplo n.º 43
0
def main():
    df = pd.read_csv(dir_path + "/../data/iris.csv")
    # Change class labels from strings to numbers
    df = df.replace(to_replace="setosa", value="-1")
    df = df.replace(to_replace="virginica", value="1")
    df = df.replace(to_replace="versicolor", value="2")

    # Only select data for two classes
    X = df.loc[df['species'] != "2"].drop("species", axis=1).as_matrix()
    y = df.loc[df['species'] != "2"]["species"].as_matrix()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    
    # Adaboost classification
    clf = Adaboost(n_clf = 8)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
Ejemplo n.º 44
0
def main():

    print ("-- XGBoost --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2)  

    clf = XGBoost(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, 
        title="XGBoost", 
    accuracy=accuracy, 
    legend_labels=data.target_names)
def main():
    # Load the dataset
    data = datasets.load_iris()
    X = data.data
    y = data.target

    # Three -> two classes
    X = X[y != 2]
    y = y[y != 2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Fit and predict using LDA
    lda = LDA()
    lda.fit(X_train, y_train)
    y_pred = lda.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
Ejemplo n.º 46
0
def main():
    data = datasets.load_iris()
    X = data.data
    y = data.target

    X = X[y != 2]
    y = y[y != 2]
    y[y == 0] = -1
    y[y == 1] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    # Adaboost classification
    clf = Adaboost(n_clf=10)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
Ejemplo n.º 47
0
    def fit(self, X, y):
        X_train = X
        y_train = y

        if self.early_stopping:
            # Split the data into training and validation sets
            X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.1)
            y_validate = categorical_to_binary(y_validate)

        # Convert the nominal y values to binary
        y_train = categorical_to_binary(y_train)

        n_samples, n_features = np.shape(X_train)
        n_outputs = np.shape(y_train)[1]

        # Initial weights between [-1/sqrt(N), 1/sqrt(N)]
        a = -1 / math.sqrt(n_features)
        b = -a
        self.W = (b - a) * np.random.random((n_features, n_outputs)) + a
        self.biasW = (b - a) * np.random.random((1, n_outputs)) + a

        training_errors = []
        validation_errors = []
        iter_with_rising_val_error = 0
        for i in range(self.n_iterations):
            # Calculate outputs
            neuron_input = np.dot(X_train, self.W) + self.biasW
            neuron_output = sigmoid(neuron_input)

            # Training error
            error = y_train - neuron_output
            mse = np.mean(np.power(error, 2))
            training_errors.append(mse)

            # Calculate the loss gradient
            w_gradient = -2 * (y_train - neuron_output) * \
                sigmoid_gradient(neuron_input)
            bias_gradient = w_gradient

            # Update weights
            self.W -= self.learning_rate * X_train.T.dot(w_gradient)
            self.biasW -= self.learning_rate * \
                np.ones((1, n_samples)).dot(bias_gradient)

            if self.early_stopping:
                # Calculate the validation error
                error = y_validate - self._calculate_output(X_validate)
                mse = np.mean(np.power(error, 2))
                validation_errors.append(mse)

                # If the validation error is larger than the previous iteration increase
                # the counter
                if len(validation_errors) > 1 and validation_errors[-1] > validation_errors[-2]:
                    iter_with_rising_val_error += 1
                    # If the validation error has been for more than 50 iterations
                    # stop training to avoid overfitting
                    if iter_with_rising_val_error > 50:
                        break
                else:
                    iter_with_rising_val_error = 0

        # Plot the training error
        if self.plot_errors:
            if self.early_stopping:
                # Training and validation error plot
                training, = plt.plot(range(i+1), training_errors, label="Training Error")
                validation, = plt.plot(range(i+1), validation_errors, label="Validation Error")
                plt.legend(handles=[training, validation])
            else:
                training, = plt.plot(range(i+1), training_errors, label="Training Error")
                plt.legend(handles=[training])
            plt.title("Error Plot")
            plt.ylabel('Error')
            plt.xlabel('Iterations')
            plt.show()
Ejemplo n.º 48
0
y[y == digit1] = 0
y[y == digit2] = 1
X = data.data[idx]
X = normalize(X)

# ..........................
#  DIMENSIONALITY REDUCTION
# ..........................
pca = PCA()
X = pca.transform(X, n_components=5) # Reduce to 5 dimensions


# ..........................
#  TRAIN / TEST SPLIT
# ..........................
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Rescale label for Adaboost to {-1, 1}
rescaled_y_train = 2*y_train - np.ones(np.shape(y_train))
rescaled_y_test = 2*y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf = 8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20)
perceptron = Perceptron()
decision_tree = DecisionTree()
random_forest = RandomForest(n_estimators=150)