Example #1
0
def train_and_run_dtree(decision_tree, X_train, X_test, y_train, y_test,
                        format_string, should_print_tree):
    decision_tree.fit(X_train, y_train)
    # Calculate accurracy on test set
    y_pred = decision_tree.predict(X_test)
    acc = accuracy(y_pred, y_test)

    if should_print_tree:
        decision_tree.print_tree()
    class_estimation_graph(2, X_test, y_test, y_pred,
                           format_string.format(100 * acc))
Example #2
0
def linearly_separable():
    X, y = create_linearly_separable_two_class()
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)
    classifier = SVM(Kernel.gaussian_kernel(sigma=1))
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)
    acc = accuracy(y_pred, y_test)

    class_estimation_graph(
        2, X_test, y_test, y_pred,
        "SVM linear %.2f%% Accuracy on Linearly Separable" % (acc * 100))
Example #3
0
def main():
    n_classes = 4
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0,
                                        n_clusters_per_class=1, flip_y=0.1, n_classes=n_classes)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_proportion=0.2)
    
    k=4
    classifier = KNN_Classification(k=k)
    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    acc = accuracy(y_pred, y_test)
    
    class_estimation_graph(n_classes, X_test, y_test, y_pred,
                           "KNN %.2f%% Accuracy" % (acc*100))
Example #4
0
def with_data_error_with_slack():
    n_classes = 2
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200,
                                        n_features=2,
                                        n_informative=2,
                                        n_redundant=0,
                                        n_clusters_per_class=2,
                                        flip_y=0.1,
                                        n_classes=n_classes)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)
    classifier = SVM(Kernel.gaussian_kernel(sigma=1), C=1)
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)
    acc = accuracy(y_pred, y_test)

    class_estimation_graph(n_classes, X_test, y_test, y_pred,
                           "SVM linear %.2f%% Accuracy" % (acc * 100))
Example #5
0
def main(_=None):
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200,
                                        n_features=1,
                                        n_informative=1,
                                        n_redundant=0,
                                        n_clusters_per_class=1,
                                        flip_y=0.1)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    logistic_reg = LogisticRegressionTF()
    logistic_reg.fit(X_train, y_train)

    y_pred_probability = logistic_reg.predict(X_test)
    y_pred_probability = np.squeeze(y_pred_probability)
    mse = mean_square_error(y_pred_probability, y_test)

    logistic_reg.set_classification_boundary(0.5)
    y_pred_classified = logistic_reg.predict(X_test)
    y_pred_classified = np.squeeze(y_pred_classified)
    acc = accuracy(y_pred_classified, y_test)

    plt.figure()
    plt.scatter(X_test, y_test, color="Black", label="Actual")
    plt.scatter(X_test,
                y_pred_probability,
                color="Red",
                label="Classification Probability")
    plt.scatter(X_test,
                y_pred_classified,
                color="Blue",
                label="Rounded Prediction")
    plt.legend(loc='center right', fontsize=8)
    plt.title("Logistic Regression %.2f MSE, %.2f%% Accuracy)" %
              (mse, acc * 100))
    plt.show()
Example #6
0
    def runBinaryClassification(self, algorithm, expected_accuracy=0.95):
        """
        The algorithm should have been initialized with any additional checkers
        available for its learning method - like have the optimizer wrapped by
        NumericGradientChecker.
        
        Will be predicting for a single class - is it that class or not?
        """
        # Very simple dataset, only has 2 classes, 2 features, and no error
        X_train = binary_class_X['train']
        y_train = binary_class_y['train']

        algorithm.fit(X_train, y_train)

        X_test = binary_class_X['test']
        y_test = binary_class_y['test']

        # Round just in case the algorithm returns likelihoods
        y_pred = np.round(algorithm.predict(X_test))

        # Expect high accuracy due to no noise, simple data set, large # of samples
        self.assertGreater(accuracy(y_pred, y_test), expected_accuracy)
Example #7
0
    def runMultiClassClassification(self, algorithm, expected_accuracy=0.90):
        """
        The algorithm should have been initialized with any additional checkers
        available for its learning method - like have the optimizer wrapped by
        NumericGradientChecker.
        
        The algorithm MUST be able to handle multi class inputs
        
        Will be predicting for a single class - is it that class or not?
        """
        # Very simple dataset, only has 4 classes, 2 features, and no error
        X_train = multi_class_X['train']
        y_train = multi_class_y['train']

        algorithm.fit(X_train, y_train)

        X_test = multi_class_X['test']
        y_test = multi_class_y['test']

        y_pred = algorithm.predict(X_test)

        # Expect high accuracy due to no noise, simple data set, large # of samples
        self.assertGreater(accuracy(y_pred, y_test), expected_accuracy)
def main():
    n_classes = 4
    # Just has one feature to make it easy to graph.
    X, y = datasets.make_classification(n_samples=200,
                                        n_features=2,
                                        n_informative=2,
                                        n_redundant=0,
                                        n_clusters_per_class=1,
                                        flip_y=0.1,
                                        n_classes=n_classes)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_proportion=0.2)

    logistic_reg = OneVsAllClassification(CreateDefaultLogisticRegression)
    logistic_reg.fit(X_train, y_train)

    y_pred = logistic_reg.predict(X_test)
    acc = accuracy(y_pred, y_test)

    class_estimation_graph(
        n_classes, X_test, y_test, y_pred,
        "Logistic Regression %.2f%% Accuracy.\nShape is true class, color is estimate"
        % (acc * 100))