Esempio n. 1
0
def main():

    print("-- Classification Tree --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Decision Tree",
                   accuracy=accuracy,
                   legend_labels=data.target_names)

    print("-- Regression Tree --")

    X, y = datasets.make_regression(n_features=1,
                                    n_samples=100,
                                    bias=0,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X),
                                                        y,
                                                        test_size=0.3)

    clf = RegressionTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    print("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.title("Regression Tree (%.2f MSE)" % mse)
    plt.show()
def main():

    print("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = GradientBoostingClassifier(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Gradient Boosting",
                      accuracy=accuracy,
                      legend_labels=data.target_names)

    print("-- Gradient Boosting Regression --")

    X, y = datasets.make_regression(n_features=1,
                                    n_samples=150,
                                    bias=0,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X),
                                                        y,
                                                        test_size=0.5)

    clf = GradientBoostingRegressor(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    print("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.title("Gradient Boosting Regression (%.2f MSE)" % mse)
    plt.show()
Esempio n. 3
0
def main():

    print("-- XGBoost --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=2)

    clf = XGBoost(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="XGBoost",
                      accuracy=accuracy,
                      legend_labels=data.target_names)
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target

    n_samples, n_features = np.shape(X)
    n_hidden, n_output = 50, 10

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1)

    optimizer = GradientDescent_(learning_rate=0.0001, momentum=0.3)

    # MLP
    clf = MultilayerPerceptron(n_iterations=6000,
                            batch_size=200,
                            optimizer=optimizer,
                            val_error=True)

    clf.add(DenseLayer(n_inputs=n_features, n_units=n_hidden))
    clf.add(DenseLayer(n_inputs=n_hidden, n_units=n_hidden))
    clf.add(DenseLayer(n_inputs=n_hidden, n_units=n_output, activation_function=Softmax))  
    
    clf.fit(X_train, y_train)
    clf.plot_errors()

    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    # Optimization method for finding weights that minimizes loss
    optimizer = RMSprop(learning_rate=0.01)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
        activation_function=ELU,
        optimizer=optimizer,
        early_stopping=True,
        plot_errors=True)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
Esempio n. 6
0
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    clf = LogisticRegression(gradient_descent=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Logistic Regression",
                      accuracy=accuracy)
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    # MLP
    clf = MultilayerPerceptron(n_hidden=12,
        n_iterations=1000,
        learning_rate=0.0001, 
        momentum=0.8,
        activation_function=ExpLU,
        early_stopping=True,
        plot_errors=True)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
Esempio n. 8
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # Optimization method for finding weights that minimizes loss
    optimizer = RMSprop(learning_rate=0.01)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
                     activation_function=ExpLU,
                     optimizer=optimizer,
                     early_stopping=True,
                     plot_errors=True)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Perceptron",
                   accuracy=accuracy,
                   legend_labels=np.unique(y))
Esempio n. 9
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
                     learning_rate=0.001,
                     activation_function=Sigmoid)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Perceptron",
                      accuracy=accuracy,
                      legend_labels=np.unique(y))
Esempio n. 10
0
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    digit1 = 1
    digit2 = 8
    idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
    y = data.target[idx]
    # Change labels to {-1, 1}
    y[y == digit1] = -1
    y[y == digit2] = 1
    X = data.data[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    # Adaboost classification with 5 weak classifiers
    clf = Adaboost(n_clf=5)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print ("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
def main():
    print("-- Gradient Boosting Regression --")

    X, y = datasets.make_regression(n_features=1,
                                    n_samples=150,
                                    bias=0,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X),
                                                        y,
                                                        test_size=0.5)

    clf = GradientBoostingRegressor(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    print("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.title("Gradient Boosting Regression (%.2f MSE)" % mse)
    plt.show()
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    digit1 = 1
    digit2 = 8
    idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
    y = data.target[idx]
    # Change labels to {-1, 1}
    y[y == digit1] = -1
    y[y == digit2] = 1
    X = data.data[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    # Adaboost classification with 5 weak classifiers
    clf = Adaboost(n_clf=5)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target

    # Convert the nominal y values to binary
    y = to_categorical(y)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # MLP
    clf = MultilayerPerceptron(n_hidden=12,
                               n_iterations=1000,
                               learning_rate=0.01)

    clf.fit(X_train, y_train)
    y_pred = np.argmax(clf.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Multilayer Perceptron",
                      accuracy=accuracy,
                      legend_labels=np.unique(y))
def main():

    print ("-- Classification Tree --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred, 
        title="Decision Tree", 
        accuracy=accuracy, 
        legend_labels=data.target_names)

    print ("-- Regression Tree --")

    X, y = datasets.make_regression(n_features=1, n_samples=100, bias=0, noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.3)

    clf = RegressionTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)


    mse = mean_squared_error(y_test, y_pred)

    print ("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.title("Regression Tree (%.2f MSE)" % mse)
    plt.show()
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = KNN(k=5)
    y_pred = clf.predict(X_test, X_train, y_train)
    
    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="K Nearest Neighbors", accuracy=accuracy, legend_labels=data.target_names)
Esempio n. 16
0
    def fit(self, X, y):
        X_train = X
        y_train = y

        if self.val_error:
            # Split the data into training and validation sets
            X_train, X_validate, y_train, y_validate = train_test_split(
                X, y, test_size=0.1)
            y_validate = categorical_to_binary(y_validate)

        # Convert the nominal y values to binary
        y_train = categorical_to_binary(y_train)

        n_samples, n_features = np.shape(X_train)
        n_batches = int(n_samples / self.batch_size)

        bar = progressbar.ProgressBar(widgets=bar_widgets)

        for i in bar(range(self.n_iterations)):
            X_, y_ = shuffle_data(X_train, y_train)

            batch_t_error = 0  # Mean batch training error
            batch_v_error = 0  # Mean batch validation error
            for idx in np.array_split(np.arange(n_samples), n_batches):
                X_batch, y_batch = X_[idx], y_[idx]

                # Calculate output
                y_pred = self._forward_pass(X_batch)

                # Calculate the cross entropy training loss
                loss = np.mean(self.cross_ent.loss(y_batch, y_pred))
                batch_t_error += loss

                loss_grad = self.cross_ent.gradient(y_batch, y_pred)

                # Update the NN weights
                self._backward_pass(loss_grad=loss_grad)

                if self.val_error:
                    # Calculate the validation error
                    y_val_pred = self._forward_pass(X_validate)
                    loss = np.mean(self.cross_ent.loss(y_validate, y_val_pred))
                    batch_v_error += loss

            batch_t_error /= n_batches
            batch_v_error /= n_batches
            self.errors["training"].append(batch_t_error)
            self.errors["validation"].append(batch_v_error)
def main():
    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2)

    clf = RandomForest(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names)
def main():
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = -1
    y[y == 2] = 1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)
Esempio n. 19
0
def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    clf = NaiveBayes()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
Esempio n. 20
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = KNN(k=5)
    y_pred = clf.predict(X_test, X_train, y_train)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="K Nearest Neighbors",
                      accuracy=accuracy,
                      legend_labels=data.target_names)
Esempio n. 21
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
        learning_rate=0.001, 
        activation_function=Sigmoid)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
Esempio n. 22
0
def main():
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = -1
    y[y == 2] = 1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)
def main():

    # Load temperature data
    data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = time  # Time. Fraction of the year [0, 1]
    y = temp[:, 0]  # Temperature. Reduce to one-dim

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    n_samples, n_features = np.shape(X)

    clf = LinearRegression()

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean squared error: %s" % (mse))

    y_pred_line = clf.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    plt.plot(366 * X,
             y_pred_line,
             color='black',
             linewidth=2,
             label="Prediction")
    plt.suptitle("Polynomial Ridge Regression")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.show()
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    clf = LogisticRegression(gradient_descent=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main():

    print("-- Regression Tree --")

    # Load temperature data
    data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = standardize(time)  # Time. Fraction of the year [0, 1]
    y = temp[:, 0]  # Temperature. Reduce to one-dim

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    clf = RegressionTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    y_pred_line = clf.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    mse = mean_squared_error(y_test, y_pred)

    print("Mean Squared Error:", mse)

    # Plot the results
    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10)
    plt.suptitle("Regression Tree")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"),
               loc='lower right')
    plt.show()
Esempio n. 26
0
def main():

    print ("-- Classification Tree --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred, 
        title="Decision Tree", 
        accuracy=accuracy, 
        legend_labels=data.target_names)
def main():
    # Load the dataset
    data = datasets.load_iris()
    X = data.data
    y = data.target

    # Three -> two classes
    X = X[y != 2]
    y = y[y != 2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Fit and predict using LDA
    lda = LDA()
    lda.fit(X_train, y_train)
    y_pred = lda.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
def main():
    # Load the dataset
    data = datasets.load_iris()
    X = data.data
    y = data.target

    # Three -> two classes
    X = X[y != 2]
    y = y[y != 2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Fit and predict using LDA
    lda = LDA()
    lda.fit(X_train, y_train)
    y_pred = lda.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
def main():

    print ("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = GradientBoostingClassifier(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)


    Plot().plot_in_2d(X_test, y_pred, 
        title="Gradient Boosting", 
        accuracy=accuracy, 
        legend_labels=data.target_names)
Esempio n. 30
0
def main():
    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=2)

    clf = RandomForest(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Random Forest",
                   accuracy=accuracy,
                   legend_labels=data.target_names)
Esempio n. 31
0
def main():

    data = datasets.load_digits()
    X = data.data
    y = data.target

    n_samples = np.shape(X)
    n_hidden = 512

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=1)

    optimizer = Adam()

    #-----
    # MLP
    #-----

    # clf = NeuralNetwork(optimizer=optimizer,
    #                     loss=CrossEntropy,
    #                     validation_data=(X_test, y_test))

    # clf.add(Dense(n_hidden, input_shape=(8*8,)))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dense(n_hidden))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dropout(0.25))
    # clf.add(Dense(n_hidden))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dropout(0.25))
    # clf.add(Dense(n_hidden))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dropout(0.25))
    # clf.add(Dense(10))
    # clf.add(Activation('softmax'))

    # clf.fit(X_train, y_train, n_iterations=50, batch_size=256)
    # clf.plot_errors()

    # y_pred = np.argmax(clf.predict(X_test), axis=1)

    # accuracy = accuracy_score(y_test, y_pred)
    # print ("Accuracy:", accuracy)

    #----------
    # Conv Net
    #----------

    X_train = X_train.reshape((-1, 1, 8, 8))
    X_test = X_test.reshape((-1, 1, 8, 8))

    clf = NeuralNetwork(optimizer=optimizer,
                        loss=CrossEntropy,
                        validation_data=(X_test, y_test))

    clf.add(
        Conv2D(n_filters=16,
               filter_shape=(3, 3),
               input_shape=(1, 8, 8),
               padding='same'))
    clf.add(Activation('relu'))
    clf.add(Conv2D(n_filters=32, filter_shape=(3, 3), padding='same'))
    clf.add(Activation('relu'))
    clf.add(Flatten())
    clf.add(Dense(128))
    clf.add(Activation('relu'))
    clf.add(Dense(10))
    clf.add(Activation('softmax'))

    clf.fit(X_train, y_train, n_iterations=50, batch_size=256)
    clf.plot_errors()

    y_pred = np.argmax(clf.predict(X_test), axis=1)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    X_test = X_test.reshape(-1, 8 * 8)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Convolutional Neural Network",
                      accuracy=accuracy,
                      legend_labels=np.unique(y))
def main():

    optimizer = Adam()

    def gen_mult_ser(nums):
        """ Method which generates multiplication series """
        X = np.zeros([nums, 10, 61], dtype=float)
        y = np.zeros([nums, 10, 61], dtype=float)
        for i in range(nums):
            start = np.random.randint(2, 7)
            mult_ser = np.linspace(start, start * 10, num=10, dtype=int)
            X[i] = to_categorical(mult_ser, n_col=61)
            y[i] = np.roll(X[i], -1, axis=0)
        y[:, -1, 1] = 1  # Mark endpoint as 1
        return X, y

    def gen_num_seq(nums):
        """ Method which generates sequence of numbers """
        X = np.zeros([nums, 10, 20], dtype=float)
        y = np.zeros([nums, 10, 20], dtype=float)
        for i in range(nums):
            start = np.random.randint(0, 10)
            num_seq = np.arange(start, start + 10)
            X[i] = to_categorical(num_seq, n_col=20)
            y[i] = np.roll(X[i], -1, axis=0)
        y[:, -1, 1] = 1  # Mark endpoint as 1
        return X, y

    X, y = gen_mult_ser(3000)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    # Model definition
    clf = NeuralNetwork(optimizer=optimizer, loss=CrossEntropy)
    clf.add(RNN(10, activation="tanh", bptt_trunc=5, input_shape=(10, 61)))
    clf.add(Activation('softmax'))
    clf.summary("RNN")

    # Print a problem instance and the correct solution
    tmp_X = np.argmax(X_train[0], axis=1)
    tmp_y = np.argmax(y_train[0], axis=1)
    print("Number Series Problem:")
    print("X = [" + " ".join(tmp_X.astype("str")) + "]")
    print("y = [" + " ".join(tmp_y.astype("str")) + "]")
    print()

    train_err, _ = clf.fit(X_train, y_train, n_epochs=500, batch_size=512)

    # Predict labels of the test data
    y_pred = np.argmax(clf.predict(X_test), axis=2)
    y_test = np.argmax(y_test, axis=2)

    print()
    print("Results:")
    for i in range(5):
        # Print a problem instance and the correct solution
        tmp_X = np.argmax(X_test[i], axis=1)
        tmp_y1 = y_test[i]
        tmp_y2 = y_pred[i]
        print("X      = [" + " ".join(tmp_X.astype("str")) + "]")
        print("y_true = [" + " ".join(tmp_y1.astype("str")) + "]")
        print("y_pred = [" + " ".join(tmp_y2.astype("str")) + "]")
        print()

    accuracy = np.mean(accuracy_score(y_test, y_pred))
    print("Accuracy:", accuracy)

    training = plt.plot(range(500), train_err, label="Training Error")
    plt.title("Error Plot")
    plt.ylabel('Training Error')
    plt.xlabel('Iterations')
    plt.show()
    def fit(self, X, y):
        X_train = X
        y_train = y

        if self.early_stopping:
            # Split the data into training and validation sets
            X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.1)
            y_validate = categorical_to_binary(y_validate)

        # Convert the nominal y values to binary
        y_train = categorical_to_binary(y_train)

        n_samples, n_features = np.shape(X_train)
        n_outputs = np.shape(y_train)[1]

        # Initial weights between [-1/sqrt(N), 1/sqrt(N)]
        a = -1 / math.sqrt(n_features)
        b = -a
        self.W = (b - a) * np.random.random((n_features, self.n_hidden)) + a
        self.w0 = (b - a) * np.random.random((1, self.n_hidden)) + a
        self.V = (b - a) * np.random.random((self.n_hidden, n_outputs)) + a
        self.v0 = (b - a) * np.random.random((1, n_outputs)) + a

        # Error history
        training_errors = []
        validation_errors = []
        iter_with_rising_val_error = 0

        for i in range(self.n_iterations):

            # Calculate hidden layer
            hidden_layer_input = X_train.dot(self.W) + self.w0
            hidden_layer_output = self.activation.function(hidden_layer_input)
            # Calculate output layer
            output_layer_input = hidden_layer_output.dot(self.V) + self.v0
            output = self.activation.function(output_layer_input)

            # Calculate the error
            error = y_train - output
            mse = np.mean(np.power(error, 2))
            training_errors.append(mse)

            # Calculate the loss gradient
            output_gradient = -2 * (y_train - output) * \
                self.activation.gradient(output_layer_input)
            hidden_gradient = output_gradient.dot(
                self.V.T) * self.activation.gradient(hidden_layer_input)

            # Calcualte the gradient with respect to each weight term
            grad_wrt_v = hidden_layer_output.T.dot(output_gradient)
            grad_wrt_v0 = np.ones((1, n_samples)).dot(output_gradient)
            grad_wrt_w = X_train.T.dot(hidden_gradient)
            grad_wrt_w0 = np.ones((1, n_samples)).dot(hidden_gradient)

            # Update weights
            # Move against the gradient to minimize loss
            self.V          = self.v_opt.update(w=self.V, grad_wrt_w=grad_wrt_v)
            self.v0      = self.v0_opt.update(w=self.v0, grad_wrt_w=grad_wrt_v0)
            self.W          = self.w_opt.update(w=self.W, grad_wrt_w=grad_wrt_w)
            self.w0      = self.w0_opt.update(w=self.w0, grad_wrt_w=grad_wrt_w0)

            if self.early_stopping:
                # Calculate the validation error
                error = y_validate - self._calculate_output(X_validate)
                mse = np.mean(np.power(error, 2))
                validation_errors.append(mse)

                # If the validation error is larger than the previous iteration increase
                # the counter
                if len(validation_errors) > 1 and validation_errors[-1] > validation_errors[-2]:
                    iter_with_rising_val_error += 1
                    # # If the validation error has been for more than 50 iterations
                    # # stop training to avoid overfitting
                    if iter_with_rising_val_error > 50:
                        break
                else:
                    iter_with_rising_val_error = 0


        # Plot the training error
        if self.plot_errors:
            if self.early_stopping:
                # Training and validation error plot
                training, = plt.plot(range(i+1), training_errors, label="Training Error")
                validation, = plt.plot(range(i+1), validation_errors, label="Validation Error")
                plt.legend(handles=[training, validation])
            else:
                training, = plt.plot(range(i+1), training_errors, label="Training Error")
                plt.legend(handles=[training])
            plt.title("Error Plot")
            plt.ylabel('Error')
            plt.xlabel('Iterations')
            plt.show()
Esempio n. 34
0
X = normalize(X)

print("Dataset: The Digit Dataset (digits %s and %s)" % (digit1, digit2))

# ..........................
#  DIMENSIONALITY REDUCTION
# ..........................
pca = PCA()
X = pca.transform(X, n_components=5)  # Reduce to 5 dimensions

n_samples, n_features = np.shape(X)

# ..........................
#  TRAIN / TEST SPLIT
# ..........................
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Rescaled labels {-1, 1}
rescaled_y_train = 2 * y_train - np.ones(np.shape(y_train))
rescaled_y_test = 2 * y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf=8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = NeuralNetwork(optimizer=Adam(), loss=CrossEntropy)
mlp.add(Dense(input_shape=(n_features, ), n_units=64))
mlp.add(Activation('relu'))
mlp.add(Dense(n_units=64))
Esempio n. 35
0
X = normalize(X)

print ("Dataset: The Digit Dataset (digits %s and %s)" % (digit1, digit2))

# ..........................
#  DIMENSIONALITY REDUCTION
# ..........................
pca = PCA()
X = pca.transform(X, n_components=5) # Reduce to 5 dimensions

n_samples, n_features = np.shape(X)

# ..........................
#  TRAIN / TEST SPLIT
# ..........................
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Rescaled labels {-1, 1}
rescaled_y_train = 2*y_train - np.ones(np.shape(y_train))
rescaled_y_test = 2*y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf = 8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = NeuralNetwork(optimizer=Adam(), 
                    loss=CrossEntropy)
mlp.add(Dense(input_shape=(n_features,), n_units=64))
mlp.add(Activation('relu'))
Esempio n. 36
0
    def fit(self, X, y):
        X_train = X
        y_train = y

        if self.early_stopping:
            # Split the data into training and validation sets
            X_train, X_validate, y_train, y_validate = train_test_split(
                X, y, test_size=0.1)
            y_validate = categorical_to_binary(y_validate)

        # Convert the nominal y values to binary
        y_train = categorical_to_binary(y_train)

        n_samples, n_features = np.shape(X_train)
        n_outputs = np.shape(y_train)[1]

        # Initial weights between [-1/sqrt(N), 1/sqrt(N)]
        a = -1 / math.sqrt(n_features)
        b = -a
        self.W = (b - a) * np.random.random((n_features, n_outputs)) + a
        self.w0 = (b - a) * np.random.random((1, n_outputs)) + a

        # Error history
        training_errors = []
        validation_errors = []
        iter_with_rising_val_error = 0

        # Lambda function that calculates the neuron outputs
        neuron_output = lambda w, b: self.activation.function(
            np.dot(X_train, w) + b)

        # Lambda function that calculates the loss gradient
        loss_grad = lambda w, b: -2 * (y_train - neuron_output(w, b)) * \
            self.activation.gradient(np.dot(X_train, w) + b)

        # Lambda functions that calculates the gradient of the loss with
        # respect to each weight term. Allows for computation of loss gradient at different
        # coordinates.
        grad_func_wrt_w = lambda w: X_train.T.dot(loss_grad(w, self.w0))
        grad_func_wrt_w0 = lambda b: np.ones(
            (1, n_samples)).dot(loss_grad(self.W, b))

        # Optimize paramaters for n_iterations
        for i in range(self.n_iterations):

            # Training error
            error = y_train - neuron_output(self.W, self.w0)
            mse = np.mean(np.power(error, 2))
            training_errors.append(mse)

            # Update weights
            self.W = self.w_opt.update(w=self.W, grad_func=grad_func_wrt_w)
            self.w0 = self.w0_opt.update(w=self.w0, grad_func=grad_func_wrt_w0)

            if self.early_stopping:
                # Calculate the validation error
                error = y_validate - self._calculate_output(X_validate)
                mse = np.mean(np.power(error, 2))
                validation_errors.append(mse)

                # If the validation error is larger than the previous iteration increase
                # the counter
                if len(validation_errors
                       ) > 1 and validation_errors[-1] > validation_errors[-2]:
                    iter_with_rising_val_error += 1
                    # If the validation error has been for more than 50 iterations
                    # stop training to avoid overfitting
                    if iter_with_rising_val_error > 50:
                        break
                else:
                    iter_with_rising_val_error = 0

        # Plot the training error
        if self.plot_errors:
            if self.early_stopping:
                # Training and validation error plot
                training, = plt.plot(range(i + 1),
                                     training_errors,
                                     label="Training Error")
                validation, = plt.plot(range(i + 1),
                                       validation_errors,
                                       label="Validation Error")
                plt.legend(handles=[training, validation])
            else:
                training, = plt.plot(range(i + 1),
                                     training_errors,
                                     label="Training Error")
                plt.legend(handles=[training])
            plt.title("Error Plot")
            plt.ylabel('Error')
            plt.xlabel('Iterations')
            plt.show()
def main():

    optimizer = Adam()

    #-----
    # MLP
    #-----

    data = datasets.load_digits()
    X = data.data
    y = data.target

    # Convert to one-hot encoding
    y = to_categorical(y.astype("int"))

    n_samples = np.shape(X)
    n_hidden = 512

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=1)

    clf = NeuralNetwork(optimizer=optimizer,
                        loss=CrossEntropy,
                        validation_data=(X_test, y_test))

    clf.add(Dense(n_hidden, input_shape=(8 * 8, )))
    clf.add(Activation('leaky_relu'))
    clf.add(Dense(n_hidden))
    clf.add(Activation('leaky_relu'))
    clf.add(Dropout(0.25))
    clf.add(Dense(n_hidden))
    clf.add(Activation('leaky_relu'))
    clf.add(Dropout(0.25))
    clf.add(Dense(n_hidden))
    clf.add(Activation('leaky_relu'))
    clf.add(Dropout(0.25))
    clf.add(Dense(10))
    clf.add(Activation('softmax'))

    print()
    clf.summary(name="MLP")

    train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256)

    # Training and validation error plot
    n = len(train_err)
    training, = plt.plot(range(n), train_err, label="Training Error")
    validation, = plt.plot(range(n), val_err, label="Validation Error")
    plt.legend(handles=[training, validation])
    plt.title("Error Plot")
    plt.ylabel('Error')
    plt.xlabel('Iterations')
    plt.show()

    # Predict labels of the test data
    y_pred = np.argmax(clf.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    # Reduce dimension to 2D using PCA and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Multilayer Perceptron",
                      accuracy=accuracy,
                      legend_labels=range(10))
Esempio n. 38
0
def main():

    data = datasets.load_digits()
    X = data.data
    y = data.target

    n_samples = np.shape(X)
    n_hidden = 512

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=1)

    optimizer = Adam()

    #-----
    # MLP
    #-----

    # clf = NeuralNetwork(optimizer=optimizer,
    #                     loss=CrossEntropy,
    #                     validation_data=(X_test, y_test))

    # clf.add(Dense(n_hidden, input_shape=(8*8,)))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dense(n_hidden))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dropout(0.25))
    # clf.add(Dense(n_hidden))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dropout(0.25))
    # clf.add(Dense(n_hidden))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dropout(0.25))
    # clf.add(Dense(10))
    # clf.add(Activation('softmax'))

    # print ()
    # clf.summary(name="MLP")

    # clf.fit(X_train, y_train, n_epochs=50, batch_size=256)
    # clf.plot_errors()

    # y_pred = np.argmax(clf.predict(X_test), axis=1)

    # accuracy = accuracy_score(y_test, y_pred)
    # print ("Accuracy:", accuracy)

    #----------
    # Conv Net
    #----------

    X_train = X_train.reshape((-1, 1, 8, 8))
    X_test = X_test.reshape((-1, 1, 8, 8))

    clf = NeuralNetwork(optimizer=optimizer,
                        loss=CrossEntropy,
                        validation_data=(X_test, y_test))

    clf.add(
        Conv2D(n_filters=16,
               filter_shape=(3, 3),
               input_shape=(1, 8, 8),
               padding='same'))
    clf.add(Activation('relu'))
    clf.add(Dropout(0.25))
    clf.add(BatchNormalization())
    clf.add(Conv2D(n_filters=32, filter_shape=(3, 3), padding='same'))
    clf.add(Activation('relu'))
    clf.add(Dropout(0.25))
    clf.add(BatchNormalization())
    clf.add(Flatten())
    clf.add(Dense(256))
    clf.add(Activation('relu'))
    clf.add(Dropout(0.5))
    clf.add(BatchNormalization())
    clf.add(Dense(10))
    clf.add(Activation('softmax'))

    print()
    clf.summary(name="ConvNet")

    train_err, val_err = clf.fit(X_train, y_train, n_epochs=50, batch_size=256)

    # Training and validation error plot
    n = len(train_err)
    training, = plt.plot(range(n), train_err, label="Training Error")
    validation, = plt.plot(range(n), val_err, label="Validation Error")
    plt.legend(handles=[training, validation])
    plt.title("Error Plot")
    plt.ylabel('Error')
    plt.xlabel('Iterations')
    plt.show()

    # Make a prediction of the test set
    y_pred = np.argmax(clf.predict(X_test), axis=1)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    X_test = X_test.reshape(-1, 8 * 8)

    # Reduce dimension to 2D using PCA and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Convolutional Neural Network",
                      accuracy=accuracy,
                      legend_labels=np.unique(y))
def main():

    # Load temperature data
    data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].values).T
    temp = np.atleast_2d(data["temp"].values).T

    X = time # fraction of the year [0, 1]
    y = temp

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    n_samples, n_features = np.shape(X)

    # Prior parameters
    # - Weights are assumed distr. according to a Normal distribution
    # - The variance of the weights are assumed distributed according to 
    #   a scaled inverse chi-squared distribution.
    # High prior uncertainty!
    # Normal
    mu0 = np.array([0] * n_features)
    omega0 = np.diag([.0001] * n_features)
    # Scaled inverse chi-squared
    nu0 = 1
    sigma_sq0 = 100

    # The credible interval
    cred_int = 10

    clf = BayesianRegression(n_draws=2000, 
        poly_degree=4, 
        mu0=mu0, 
        omega0=omega0, 
        nu0=nu0, 
        sigma_sq0=sigma_sq0,
        cred_int=cred_int)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    # Get prediction line
    y_pred_, y_lower_, y_upper_ = clf.predict(X=X, eti=True)

    # Print the mean squared error
    print ("Mean Squared Error:", mse)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction")
    p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(cred_int))
    p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2)
    plt.axis((0, 366, -20, 25))
    plt.suptitle("Bayesian Regression")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend(loc='lower right')
    # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.legend(loc='lower right')

    plt.show()
def main():

    data = datasets.load_digits()
    X = data.data
    y = data.target

    n_samples = np.shape(X)
    n_hidden = 512

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=1)

    optimizer = Adam()

    #-----
    # MLP
    #-----

    # clf = NeuralNetwork(optimizer=optimizer,
    #                     loss=CrossEntropy,
    #                     validation_data=(X_test, y_test))

    # clf.add(Dense(n_hidden, input_shape=(8*8,)))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dense(n_hidden))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dropout(0.25))
    # clf.add(Dense(n_hidden))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dropout(0.25))
    # clf.add(Dense(n_hidden))
    # clf.add(Activation('leaky_relu'))
    # clf.add(Dropout(0.25))
    # clf.add(Dense(10))
    # clf.add(Activation('softmax'))
    
    # clf.fit(X_train, y_train, n_iterations=50, batch_size=256)
    # clf.plot_errors()

    # y_pred = np.argmax(clf.predict(X_test), axis=1)

    # accuracy = accuracy_score(y_test, y_pred)
    # print ("Accuracy:", accuracy)

    #----------
    # Conv Net
    #----------

    X_train = X_train.reshape((-1,1,8,8))
    X_test = X_test.reshape((-1,1,8,8))

    clf = NeuralNetwork(optimizer=optimizer,
                        loss=CrossEntropy,
                        validation_data=(X_test, y_test))

    clf.add(Conv2D(n_filters=16, filter_shape=(3,3), input_shape=(1,8,8), padding='same'))
    clf.add(Activation('relu'))
    clf.add(Conv2D(n_filters=32, filter_shape=(3,3), padding='same'))
    clf.add(Activation('relu'))
    clf.add(Flatten())
    clf.add(Dense(128))
    clf.add(Activation('relu'))
    clf.add(Dense(10))
    clf.add(Activation('softmax'))
    
    clf.fit(X_train, y_train, n_iterations=50, batch_size=256)
    clf.plot_errors()

    y_pred = np.argmax(clf.predict(X_test), axis=1)

    accuracy = accuracy_score(y_test, y_pred)
    print ("Accuracy:", accuracy)

    X_test = X_test.reshape(-1, 8*8)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Convolutional Neural Network", accuracy=accuracy, legend_labels=np.unique(y))
Esempio n. 41
0
def main():

    # Load temperature data
    data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = time  # fraction of the year [0, 1]
    y = temp

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    n_samples, n_features = np.shape(X)

    # Prior parameters
    # - Weights are assumed distr. according to a Normal distribution
    # - The variance of the weights are assumed distributed according to
    #   a scaled inverse chi-squared distribution.
    # High prior uncertainty!
    # Normal
    mu0 = np.array([0] * n_features)
    omega0 = np.diag([.0001] * n_features)
    # Scaled inverse chi-squared
    nu0 = 1
    sigma_sq0 = 100

    # The credible interval
    cred_int = 10

    clf = BayesianRegression(n_draws=2000,
                             poly_degree=4,
                             mu0=mu0,
                             omega0=omega0,
                             nu0=nu0,
                             sigma_sq0=sigma_sq0,
                             cred_int=cred_int)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    # Get prediction line
    y_pred_, y_lower_, y_upper_ = clf.predict(X=X, eti=True)

    # Print the mean squared error
    print("Mean Squared Error:", mse)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    p1 = plt.plot(366 * X,
                  y_pred_,
                  color="black",
                  linewidth=2,
                  label="Prediction")
    p2 = plt.plot(366 * X,
                  y_lower_,
                  color="gray",
                  linewidth=2,
                  label="{0}% Credible Interval".format(cred_int))
    p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2)
    plt.axis((0, 366, -20, 25))
    plt.suptitle("Bayesian Regression")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend(loc='lower right')
    # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.legend(loc='lower right')

    plt.show()
Esempio n. 42
0
def main():

    # Load temperature data
    data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = data["temp"].as_matrix()

    X = time # fraction of the year [0, 1]
    y = temp

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    poly_degree = 11

    # Finding regularization constant using cross validation
    lowest_error = float("inf")
    best_reg_factor = None
    print ("Finding regularization constant using cross validation:")
    k = 10
    for reg_factor in np.arange(0, 0.1, 0.01):
        cross_validation_sets = k_fold_cross_validation_sets(
            X_train, y_train, k=k)
        mse = 0
        for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
            clf = PolynomialRidgeRegression(degree=poly_degree, 
                                            reg_factor=reg_factor,
                                            learning_rate=0.001,
                                            n_iterations=10000)
            clf.fit(_X_train, _y_train)
            y_pred = clf.predict(_X_test)
            _mse = mean_squared_error(_y_test, y_pred)
            mse += _mse
        mse /= k

        # Print the mean squared error
        print ("\tMean Squared Error: %s (regularization: %s)" % (mse, reg_factor))

        # Save reg. constant that gave lowest error
        if mse < lowest_error:
            best_reg_factor = reg_factor
            lowest_error = mse

    # Make final prediction
    clf = PolynomialRidgeRegression(degree=poly_degree, 
                                    reg_factor=best_reg_factor,
                                    learning_rate=0.001,
                                    n_iterations=10000)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print ("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor))

    y_pred_line = clf.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction")
    plt.suptitle("Polynomial Ridge Regression")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.show()
    def fit(self, X, y):
        X_train = X
        y_train = y

        if self.early_stopping:
            # Split the data into training and validation sets
            X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.1)
            y_validate = categorical_to_binary(y_validate)

        # Convert the nominal y values to binary
        y_train = categorical_to_binary(y_train)

        n_samples, n_features = np.shape(X_train)
        n_outputs = np.shape(y_train)[1]

        # Initial weights between [-1/sqrt(N), 1/sqrt(N)]
        a = -1 / math.sqrt(n_features)
        b = -a
        self.W = (b - a) * np.random.random((n_features, n_outputs)) + a
        self.w0 = (b - a) * np.random.random((1, n_outputs)) + a

        # Error history
        training_errors = []
        validation_errors = []
        iter_with_rising_val_error = 0

        # Lambda function that calculates the neuron outputs
        neuron_output = lambda w, b: self.activation.function(np.dot(X_train, w) + b)

        # Lambda function that calculates the loss gradient
        loss_grad = lambda w, b: -2 * (y_train - neuron_output(w, b)) * \
            self.activation.gradient(np.dot(X_train, w) + b)

        # Lambda functions that calculates the gradient of the loss with 
        # respect to each weight term. Allows for computation of loss gradient at different
        # coordinates.
        grad_func_wrt_w = lambda w: X_train.T.dot(loss_grad(w, self.w0))
        grad_func_wrt_w0 = lambda b: np.ones((1, n_samples)).dot(loss_grad(self.W, b))

        # Optimize paramaters for n_iterations
        for i in range(self.n_iterations):

            # Training error
            error = y_train - neuron_output(self.W, self.w0)
            mse = np.mean(np.power(error, 2))
            training_errors.append(mse)

            # Update weights
            self.W = self.w_opt.update(w=self.W, grad_func=grad_func_wrt_w)
            self.w0 = self.w0_opt.update(w=self.w0, grad_func=grad_func_wrt_w0)

            if self.early_stopping:
                # Calculate the validation error
                error = y_validate - self._calculate_output(X_validate)
                mse = np.mean(np.power(error, 2))
                validation_errors.append(mse)

                # If the validation error is larger than the previous iteration increase
                # the counter
                if len(validation_errors) > 1 and validation_errors[-1] > validation_errors[-2]:
                    iter_with_rising_val_error += 1
                    # If the validation error has been for more than 50 iterations
                    # stop training to avoid overfitting
                    if iter_with_rising_val_error > 50:
                        break
                else:
                    iter_with_rising_val_error = 0

        # Plot the training error
        if self.plot_errors:
            if self.early_stopping:
                # Training and validation error plot
                training, = plt.plot(range(i+1), training_errors, label="Training Error")
                validation, = plt.plot(range(i+1), validation_errors, label="Validation Error")
                plt.legend(handles=[training, validation])
            else:
                training, = plt.plot(range(i+1), training_errors, label="Training Error")
                plt.legend(handles=[training])
            plt.title("Error Plot")
            plt.ylabel('Error')
            plt.xlabel('Iterations')
            plt.show()