def assignment3p1(): print('Assignment 3.1') print('Use PCA to reduce the datasets to 2D and plot the boundaries.') for dataset_name in dataset.DatasetNames: samples, labels = dataset.load_dataset(dataset_name) training_samples, training_labels, test_samples, test_labels = \ dataset.split_dataset(samples=samples, labels=labels, train_fraction=0.5, balance_classes=True, seed=0) pca = decomposition.PCA(n_components=2) pca.fit(training_samples) training_samples = pca.transform(training_samples) test_samples = pca.transform(test_samples) classifier = bc.BayesClassifier.train(samples=training_samples, labels=training_labels, naive=False) test_predictions = classifier.classify(samples=test_samples) test_accuracy = bc.evaluate_accuracy(predictions=test_predictions, labels=test_labels) # Plot the classification of the test samples. fig, (ax1, ax2) = plt.subplots(1, 2) plotting.plot_samples_2d(ax=ax1, samples=test_samples, labels=test_labels) plotting.plot_gaussians(ax=ax1, labels=test_labels, mu=classifier.mu, sigma=classifier.sigma) ax1.legend() ax1.set_title('Ground truth') plotting.plot_samples_2d(ax=ax2, samples=test_samples, labels=test_predictions) plotting.plot_gaussians(ax=ax2, labels=test_predictions, mu=classifier.mu, sigma=classifier.sigma) plotting.plot_boundaries(ax=ax2, classifier=classifier, grid_size=1000) ax2.legend() ax2.set_title('Prediction') fig.suptitle('Assignment 3.1\n' 'Dataset: {}\n' 'Accuracy: {:.3f}'.format(dataset_name.value, test_accuracy)) plt.show() plt.close()
def assignment1(): print('Assignment 1') print('Compute the Maximum Likelihood estimation ' 'of a synthetic Gaussian dataset.') # Generate a dataset. samples, labels = make_blobs(n_samples=200, centers=5, n_features=2, random_state=0) mu, sigma = bc.maximum_likelihood_estimator(samples, labels, naive=False) plt.figure() ax = plt.gca() plotting.plot_samples_2d(ax=ax, samples=samples, labels=labels) plotting.plot_gaussians(ax=ax, labels=labels, mu=mu, sigma=sigma) plt.title('Assignment 1') ax.legend() plt.show() plt.close()
def assignment6p1(): print('Assignment 6.1') print('Boosted trees, using Sklearn implementation: ' 'plot the boundaries.') for dataset_name in dataset.DatasetNames: samples, labels = dataset.load_dataset(dataset_name) pca = decomposition.PCA(n_components=2) pca.fit(samples) samples = pca.transform(samples) (training_samples, training_labels, test_samples, test_labels) = dataset.split_dataset(samples=samples, labels=labels, train_fraction=0.7, balance_classes=True, seed=0) weak_classifier = \ tree_cls.SklearnDecisionTreeClassifierWrapper.train( samples=training_samples, labels=training_labels, weights=None) weak_predictions = weak_classifier.classify(samples=test_samples) weak_accuracy = np.mean(weak_predictions == test_labels) boost_classifier = boost_cls.BoostClassifier.train( classifier_class=tree_cls.SklearnDecisionTreeClassifierWrapper, samples=training_samples, labels=training_labels, num_iters=10) boost_predictions = boost_classifier.classify(samples=test_samples) boost_accuracy = np.mean(boost_predictions == test_labels) # Plot the classification of the test samples. fig, (ax1, ax2, ax3) = plt.subplots(1, 3) plotting.plot_samples_2d(ax=ax1, samples=test_samples, labels=test_labels) ax1.legend() ax1.set_title('Ground truth') plotting.plot_samples_2d(ax=ax2, samples=test_samples, labels=weak_predictions) plotting.plot_boundaries(ax=ax2, classifier=weak_classifier, grid_size=1000) ax2.legend() ax2.set_title('Weak classifier') plotting.plot_samples_2d(ax=ax3, samples=test_samples, labels=boost_predictions) plotting.plot_boundaries(ax=ax3, classifier=boost_classifier, grid_size=1000) ax3.legend() ax3.set_title('Boosted classifier') fig.suptitle('Assignment 5.2\n' 'Dataset: {}\n' 'Weak classifier accuracy: {:.3f}\n' 'Boosted classifier accuracy: {:.3f}'.format( dataset_name.value, weak_accuracy, boost_accuracy)) plt.show() plt.close()
def assignment2(): print('Assignment 2') print('Compute the prior and classify using the Bayesian rule.') dataset_size = 200 # Generate a dataset. samples, labels = make_blobs(n_samples=2 * dataset_size, centers=5, n_features=2, random_state=0) training_samples = samples[:dataset_size] test_samples = samples[dataset_size:] training_labels = labels[:dataset_size] test_labels = labels[dataset_size:] bayes_classifier = bc.BayesClassifier.train(samples=training_samples, labels=training_labels, naive=False) # Evaluate on the training data. training_predictions = bayes_classifier.classify(training_samples) training_accuracy = bc.evaluate_accuracy(predictions=training_predictions, labels=training_labels) print('Training accuracy: {:.3f}'.format(training_accuracy)) # Evaluate on the test data. test_predictions = bayes_classifier.classify(test_samples) test_accuracy = bc.evaluate_accuracy(predictions=test_predictions, labels=test_labels) print('Test accuracy: {:.3f}'.format(test_accuracy)) # Plot the classification of the test samples. fig, (ax1, ax2) = plt.subplots(1, 2) plotting.plot_samples_2d(ax=ax1, samples=test_samples, labels=test_labels) plotting.plot_gaussians(ax=ax1, labels=test_labels, mu=bayes_classifier.mu, sigma=bayes_classifier.sigma) ax1.legend() ax1.set_title('Ground truth') plotting.plot_samples_2d(ax=ax2, samples=test_samples, labels=test_predictions) plotting.plot_gaussians(ax=ax2, labels=test_predictions, mu=bayes_classifier.mu, sigma=bayes_classifier.sigma) plotting.plot_boundaries(ax=ax2, classifier=bayes_classifier, grid_size=1000) ax2.legend() ax2.set_title('Prediction') fig.suptitle('Assignment 2 - Test samples\n' 'Accuracy: {:.3f}'.format(test_accuracy)) plt.show() plt.close()