def assignment3p1():
    print('Assignment 3.1')
    print('Use PCA to reduce the datasets to 2D and plot the boundaries.')

    for dataset_name in dataset.DatasetNames:
        samples, labels = dataset.load_dataset(dataset_name)
        training_samples, training_labels, test_samples, test_labels = \
            dataset.split_dataset(samples=samples,
                                  labels=labels,
                                  train_fraction=0.5,
                                  balance_classes=True,
                                  seed=0)

        pca = decomposition.PCA(n_components=2)
        pca.fit(training_samples)
        training_samples = pca.transform(training_samples)
        test_samples = pca.transform(test_samples)

        classifier = bc.BayesClassifier.train(samples=training_samples,
                                              labels=training_labels,
                                              naive=False)
        test_predictions = classifier.classify(samples=test_samples)
        test_accuracy = bc.evaluate_accuracy(predictions=test_predictions,
                                             labels=test_labels)

        # Plot the classification of the test samples.
        fig, (ax1, ax2) = plt.subplots(1, 2)
        plotting.plot_samples_2d(ax=ax1,
                                 samples=test_samples,
                                 labels=test_labels)
        plotting.plot_gaussians(ax=ax1,
                                labels=test_labels,
                                mu=classifier.mu,
                                sigma=classifier.sigma)
        ax1.legend()
        ax1.set_title('Ground truth')

        plotting.plot_samples_2d(ax=ax2,
                                 samples=test_samples,
                                 labels=test_predictions)
        plotting.plot_gaussians(ax=ax2,
                                labels=test_predictions,
                                mu=classifier.mu,
                                sigma=classifier.sigma)
        plotting.plot_boundaries(ax=ax2, classifier=classifier, grid_size=1000)
        ax2.legend()
        ax2.set_title('Prediction')

        fig.suptitle('Assignment 3.1\n'
                     'Dataset: {}\n'
                     'Accuracy: {:.3f}'.format(dataset_name.value,
                                               test_accuracy))
        plt.show()
        plt.close()
def assignment1():
    print('Assignment 1')
    print('Compute the Maximum Likelihood estimation '
          'of a synthetic Gaussian dataset.')

    # Generate a dataset.
    samples, labels = make_blobs(n_samples=200,
                                 centers=5,
                                 n_features=2,
                                 random_state=0)
    mu, sigma = bc.maximum_likelihood_estimator(samples, labels, naive=False)

    plt.figure()
    ax = plt.gca()
    plotting.plot_samples_2d(ax=ax, samples=samples, labels=labels)
    plotting.plot_gaussians(ax=ax, labels=labels, mu=mu, sigma=sigma)
    plt.title('Assignment 1')
    ax.legend()
    plt.show()
    plt.close()
def assignment6p1():
    print('Assignment 6.1')
    print('Boosted trees, using Sklearn implementation: '
          'plot the boundaries.')

    for dataset_name in dataset.DatasetNames:
        samples, labels = dataset.load_dataset(dataset_name)
        pca = decomposition.PCA(n_components=2)
        pca.fit(samples)
        samples = pca.transform(samples)

        (training_samples, training_labels, test_samples,
         test_labels) = dataset.split_dataset(samples=samples,
                                              labels=labels,
                                              train_fraction=0.7,
                                              balance_classes=True,
                                              seed=0)

        weak_classifier = \
            tree_cls.SklearnDecisionTreeClassifierWrapper.train(
                samples=training_samples,
                labels=training_labels,
                weights=None)
        weak_predictions = weak_classifier.classify(samples=test_samples)
        weak_accuracy = np.mean(weak_predictions == test_labels)

        boost_classifier = boost_cls.BoostClassifier.train(
            classifier_class=tree_cls.SklearnDecisionTreeClassifierWrapper,
            samples=training_samples,
            labels=training_labels,
            num_iters=10)
        boost_predictions = boost_classifier.classify(samples=test_samples)
        boost_accuracy = np.mean(boost_predictions == test_labels)

        # Plot the classification of the test samples.
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3)
        plotting.plot_samples_2d(ax=ax1,
                                 samples=test_samples,
                                 labels=test_labels)
        ax1.legend()
        ax1.set_title('Ground truth')

        plotting.plot_samples_2d(ax=ax2,
                                 samples=test_samples,
                                 labels=weak_predictions)
        plotting.plot_boundaries(ax=ax2,
                                 classifier=weak_classifier,
                                 grid_size=1000)
        ax2.legend()
        ax2.set_title('Weak classifier')

        plotting.plot_samples_2d(ax=ax3,
                                 samples=test_samples,
                                 labels=boost_predictions)
        plotting.plot_boundaries(ax=ax3,
                                 classifier=boost_classifier,
                                 grid_size=1000)
        ax3.legend()
        ax3.set_title('Boosted classifier')

        fig.suptitle('Assignment 5.2\n'
                     'Dataset: {}\n'
                     'Weak classifier accuracy: {:.3f}\n'
                     'Boosted classifier accuracy: {:.3f}'.format(
                         dataset_name.value, weak_accuracy, boost_accuracy))

        plt.show()
        plt.close()
def assignment2():
    print('Assignment 2')
    print('Compute the prior and classify using the Bayesian rule.')

    dataset_size = 200

    # Generate a dataset.
    samples, labels = make_blobs(n_samples=2 * dataset_size,
                                 centers=5,
                                 n_features=2,
                                 random_state=0)

    training_samples = samples[:dataset_size]
    test_samples = samples[dataset_size:]

    training_labels = labels[:dataset_size]
    test_labels = labels[dataset_size:]

    bayes_classifier = bc.BayesClassifier.train(samples=training_samples,
                                                labels=training_labels,
                                                naive=False)

    # Evaluate on the training data.
    training_predictions = bayes_classifier.classify(training_samples)
    training_accuracy = bc.evaluate_accuracy(predictions=training_predictions,
                                             labels=training_labels)
    print('Training accuracy: {:.3f}'.format(training_accuracy))

    # Evaluate on the test data.
    test_predictions = bayes_classifier.classify(test_samples)
    test_accuracy = bc.evaluate_accuracy(predictions=test_predictions,
                                         labels=test_labels)
    print('Test accuracy: {:.3f}'.format(test_accuracy))

    # Plot the classification of the test samples.
    fig, (ax1, ax2) = plt.subplots(1, 2)
    plotting.plot_samples_2d(ax=ax1, samples=test_samples, labels=test_labels)
    plotting.plot_gaussians(ax=ax1,
                            labels=test_labels,
                            mu=bayes_classifier.mu,
                            sigma=bayes_classifier.sigma)
    ax1.legend()
    ax1.set_title('Ground truth')

    plotting.plot_samples_2d(ax=ax2,
                             samples=test_samples,
                             labels=test_predictions)
    plotting.plot_gaussians(ax=ax2,
                            labels=test_predictions,
                            mu=bayes_classifier.mu,
                            sigma=bayes_classifier.sigma)
    plotting.plot_boundaries(ax=ax2,
                             classifier=bayes_classifier,
                             grid_size=1000)
    ax2.legend()
    ax2.set_title('Prediction')

    fig.suptitle('Assignment 2 - Test samples\n'
                 'Accuracy: {:.3f}'.format(test_accuracy))
    plt.show()
    plt.close()