def initializeDataMaps(X, y, X_map, y_map):
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)
    X_map['train'] = X_train
    y_map['train'] = y_train
    X_map['test'] = X_test
    y_map['test'] = y_test
Esempio n. 2
0
def main(num_samples=50, points_per_dimension=20):
    X, y = datasets.make_classification(n_samples=num_samples,
                                        n_features=2,
                                        n_informative=2,
                                        n_redundant=0,
                                        n_clusters_per_class=2,
                                        flip_y=0.1)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    logistic_reg = LogisticRegression(optimizer=GradientDescent(
        num_iterations=20000))
    logistic_reg.fit(X_train, y_train)
    decision_boundary_graph(X_test,
                            y_test,
                            logistic_reg,
                            "Logistic Regression",
                            points_per_dimension=points_per_dimension)

    if svm_able_to_run:
        logistic_reg = SVM(Kernel.linear_kernel(), C=1)
        logistic_reg.fit(X_train, y_train)
        decision_boundary_graph(X_test,
                                y_test,
                                logistic_reg,
                                "SVM - Linear Kernel",
                                points_per_dimension=points_per_dimension)

        logistic_reg = SVM(Kernel.gaussian_kernel(sigma=2), C=1)
        logistic_reg.fit(X_train, y_train)
        decision_boundary_graph(X_test,
                                y_test,
                                logistic_reg,
                                "SVM - Gaussian Kernel",
                                points_per_dimension=points_per_dimension)
    else:
        print("WARNING: cvxopt not installed, SVM will not work.")

    logistic_reg = KNN_Classification(k=1)
    logistic_reg.fit(X, y)
    logistic_reg2 = KNN_Classification(k=3)
    logistic_reg2.fit(X, y)

    decision_boundary_graph(X_test,
                            y_test,
                            logistic_reg,
                            "KNN K=1",
                            points_per_dimension=points_per_dimension)
    decision_boundary_graph(X_test,
                            y_test,
                            logistic_reg2,
                            "KKN K=3",
                            points_per_dimension=points_per_dimension)
Esempio n. 3
0
def main(should_print_tree=False):
    X, y = create_1d_categorical_feature_regression()

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    # Can't run CART with regression when using categorical variables
    train_and_run_dtree(DecisionTreeRegression(algorithm_to_use='ID3'),
                        X_train, X_test, y_train, y_test,
                        'Decision Tree ID3 (MSE {:.2f})', should_print_tree)
Esempio n. 4
0
def linearly_separable():
    X, y = create_linearly_separable_two_class()
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)
    classifier = SVM(Kernel.gaussian_kernel(sigma=1))
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)
    acc = accuracy(y_pred, y_test)

    class_estimation_graph(
        2, X_test, y_test, y_pred,
        "SVM linear %.2f%% Accuracy on Linearly Separable" % (acc * 100))
Esempio n. 5
0
def main(should_print_tree=False):
    X, y = create_2d_categorical_feature_two_class()

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    train_and_run_dtree(DecisionTreeClassifier(algorithm_to_use='ID3'),
                        X_train, X_test, y_train, y_test,
                        'Decision Tree ID3 (accuracy {:.2f}%)',
                        should_print_tree)
    train_and_run_dtree(DecisionTreeClassifier(algorithm_to_use='CART'),
                        X_train, X_test, y_train, y_test,
                        'Decision Tree CART (accuracy {:.2f}%)',
                        should_print_tree)
Esempio n. 6
0
def main():
    n_classes = 4
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0,
                                        n_clusters_per_class=1, flip_y=0.1, n_classes=n_classes)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2)
    
    k=4
    classifier = KNN_Classification(k=k)
    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    acc = accuracy(y_pred, y_test)
    
    class_estimation_graph(n_classes, X_test, y_test, y_pred,
                           "KNN %.2f%% Accuracy" % (acc*100))
def main(num_iterations=200, iterations_per_update=20):
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200,
                                        n_features=1,
                                        n_informative=1,
                                        n_redundant=0,
                                        n_clusters_per_class=1,
                                        flip_y=0.1)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    logistic_reg = LogisticRegression(optimizer=OptimizerCostGraph(
        GradientDescent(num_iterations=num_iterations),
        iterations_per_update=iterations_per_update))
    logistic_reg.fit(X_train, y_train)
Esempio n. 8
0
def with_data_error_force_accurate():
    n_classes = 2
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200,
                                        n_features=2,
                                        n_informative=2,
                                        n_redundant=0,
                                        n_clusters_per_class=1,
                                        flip_y=0.1,
                                        n_classes=n_classes)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)
    classifier = SVM(Kernel.linear_kernel())

    try:
        classifier.fit(X_train, y_train)
        raise RuntimeError("Should not have successfully fit the model")
    except AssertionError as e:
        # We expect an AssertionError since the problem is non-separaable.
        pass
Esempio n. 9
0
def with_data_error_with_slack():
    n_classes = 2
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200,
                                        n_features=2,
                                        n_informative=2,
                                        n_redundant=0,
                                        n_clusters_per_class=2,
                                        flip_y=0.1,
                                        n_classes=n_classes)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)
    classifier = SVM(Kernel.gaussian_kernel(sigma=1), C=1)
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)
    acc = accuracy(y_pred, y_test)

    class_estimation_graph(n_classes, X_test, y_test, y_pred,
                           "SVM linear %.2f%% Accuracy" % (acc * 100))
Esempio n. 10
0
def main(_=None):
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200,
                                        n_features=1,
                                        n_informative=1,
                                        n_redundant=0,
                                        n_clusters_per_class=1,
                                        flip_y=0.1)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    logistic_reg = LogisticRegressionTF()
    logistic_reg.fit(X_train, y_train)

    y_pred_probability = logistic_reg.predict(X_test)
    y_pred_probability = np.squeeze(y_pred_probability)
    mse = mean_square_error(y_pred_probability, y_test)

    logistic_reg.set_classification_boundary(0.5)
    y_pred_classified = logistic_reg.predict(X_test)
    y_pred_classified = np.squeeze(y_pred_classified)
    acc = accuracy(y_pred_classified, y_test)

    plt.figure()
    plt.scatter(X_test, y_test, color="Black", label="Actual")
    plt.scatter(X_test,
                y_pred_probability,
                color="Red",
                label="Classification Probability")
    plt.scatter(X_test,
                y_pred_classified,
                color="Blue",
                label="Rounded Prediction")
    plt.legend(loc='center right', fontsize=8)
    plt.title("Logistic Regression %.2f MSE, %.2f%% Accuracy)" %
              (mse, acc * 100))
    plt.show()
def main(_=None):
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_regression(n_samples=200,
                                    n_features=1,
                                    bias=random.uniform(-10, 10),
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    linear_reg = LinearRegressionTF()
    linear_reg.fit(X_train, y_train)
    y_pred = linear_reg.predict(X_test)
    y_pred = np.squeeze(y_pred)
    mse = mean_square_error(y_pred, y_test)

    plt.figure()
    plt.scatter(X_test, y_test, color="Black", label="Actual")
    plt.plot(X_test, y_pred, label="Estimate")
    plt.legend(loc='lower right', fontsize=8)
    plt.title("Linear Regression %.2f MSE)" % (mse))
    plt.show()
def main():
    n_classes = 4
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200,
                                        n_features=2,
                                        n_informative=2,
                                        n_redundant=0,
                                        n_clusters_per_class=1,
                                        flip_y=0.1,
                                        n_classes=n_classes)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    logistic_reg = OneVsAllClassification(CreateDefaultLogisticRegression)
    logistic_reg.fit(X_train, y_train)

    y_pred = logistic_reg.predict(X_test)
    acc = accuracy(y_pred, y_test)

    class_estimation_graph(
        n_classes, X_test, y_test, y_pred,
        "Logistic Regression %.2f%% Accuracy.\nShape is true class, color is estimate"
        % (acc * 100))
def main():
    # Just using one feature to make it graphable
    X, y = datasets.make_regression(n_samples=200,
                                    n_features=1,
                                    bias=150,
                                    noise=4)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    reg = KNN_Regression(k=4)

    reg.fit(X_train, y_train)

    y_pred = reg.predict(X_test)

    mse = mean_square_error(y_pred, y_test)

    plt.scatter(X_test, y_test, color="Black", label="Actual")
    plt.scatter(X_test, y_pred, color="Red", label="Prediction")
    plt.legend(loc='lower right', fontsize=8)
    plt.title("KNN Regression (%.2f MSE)" % mse)
    plt.show()
def main():
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_regression(n_samples=200, n_features=1,
                                    bias=random.uniform(-10, 10), noise=5)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2)
    
    linear_reg = LinearRegression()
    linear_reg.fit(X_train, y_train)
    y_pred = linear_reg.predict(X_test)
    mse = mean_square_error(y_pred, y_test)
    
    linear_reg_w_grad_desc = LinearRegression(optimizer=GradientDescent(num_iterations=2500))
    linear_reg_w_grad_desc.fit(X_train, y_train)
    y_pred_w_grad_desc = linear_reg_w_grad_desc.predict(X_test)
    mse_w_grad_desc = mean_square_error(y_pred_w_grad_desc, y_test)
    
    plt.figure()
    plt.scatter(X_test, y_test, color="Black", label="Actual")
    plt.plot(X_test, y_pred, label="Estimate")
    plt.plot(X_test, y_pred_w_grad_desc, label="Estimate using Optimizer")
    plt.legend(loc='lower right', fontsize=8)
    plt.title("Linear Regression %.2f MSE Normal Eq, %.2f MSE Gradient Descent)" % (mse, mse_w_grad_desc))
    plt.show()
        """
        self.fit(X, y)
        return self.transform(X)


if __name__ == "__main__":
    # Just include one relevant feature, which we will graph upon.
    # Given far too many features with not enough samples, so will often
    # overfit when not pruning.
    X, y = datasets.make_regression(n_samples=100,
                                    n_features=30,
                                    n_informative=1,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    # Without any pruning
    reg_orig = linear_regression.LinearRegression()
    reg_orig.fit(X_train, y_train)
    y_pred_orig = reg_orig.predict(X_test)
    orig_mse = mean_square_error(y_pred_orig, y_test)

    # Setup pruner and prune the # of feautres features down to 1
    pruner = FeaturePruner(linear_regression.LinearRegression(), 1)
    X_train_pruned = pruner.fit_transform(X_train, y_train)
    X_test_pruned = pruner.transform(X_test)

    reg_pruned = linear_regression.LinearRegression()
    reg_pruned.fit(X_train_pruned, y_train)