def train_from_labels(prior=None, show=True): """Fit the mixture model using the labeled MNIST data. There are 10 mixture components, one corresponding to each of the digit classes.""" X = util.read_mnist_images(TRAIN_IMAGES_FILE) y = util.read_mnist_labels(TRAIN_LABELS_FILE) X_test = util.read_mnist_images(TEST_IMAGES_FILE) num_data, num_pixels = X.shape if prior is None: prior = Prior.default_prior() model = Model.random_initialization(prior, 10, IMAGE_DIM**2) R = np.zeros((num_data, 10)) R[np.arange(num_data), y] = 1. model.params.pi = model.update_pi(R) model.params.theta = model.update_theta(X, R) # mask which includes top half of pixels M = np.zeros(X.shape, dtype=int) M[:, :M.shape[1] // 2] = 1 if show: model.visualize_components() try: model.visualize_predictions(X[:64, :], M[:64, :]) except: print('Posterior predictive distribution not implemented yet.') print('Training log-likelihood:', model.log_likelihood(X) / num_data) print('Test log-likelihood:', model.log_likelihood(X_test) / X_test.shape[0]) return model
def print_log_probs_by_digit_class(model): """Print the average log-probability of images in each digit class.""" X = util.read_mnist_images(TRAIN_IMAGES_FILE) y = util.read_mnist_labels(TRAIN_LABELS_FILE) X_test = util.read_mnist_images(TEST_IMAGES_FILE) y_test = util.read_mnist_labels(TEST_LABELS_FILE) print('Training set') for digit in range(10): X_curr = X[y == digit, :] loglik = model.log_likelihood(X_curr) / X_curr.shape[0] print('Average log-probability of a {} image: {:1.3f}'.format( digit, loglik)) print() print('Test set') for digit in range(10): X_curr = X_test[y_test == digit, :] loglik = model.log_likelihood(X_curr) / X_curr.shape[0] print('Average log-probability of a {} image: {:1.3f}'.format( digit, loglik)) print() # Call some extra useful things to answer Conceptual Q3 for_q3(model, X, y)
def check_e_step(): """Check the E-step updates by making sure they maximize the variational objective with respect to the responsibilities. Note that this does not fully check your solution to Part 2, since it only applies to fully observed images.""" np.random.seed(0) NUM_IMAGES = 100 X = util.read_mnist_images(mixture.TRAIN_IMAGES_FILE) X = X[:NUM_IMAGES, :] model = mixture.train_from_labels(show=False) # reduce the number of observations so that the posterior is less peaked X = X[:, ::50] model.params.theta = model.params.theta[:, ::50] R = model.compute_posterior(X) opt = variational_objective(model, X, R, model.params.pi, model.params.theta) if not np.allclose(R.sum(1), 1.): print 'Uh-oh. Rows of R do not seem to sum to 1.' else: ok = True for i in range(20): new_R = perturb_R(R) new_obj = variational_objective(model, X, new_R, model.params.pi, model.params.theta) if new_obj > opt: ok = False if ok: print 'The E-step seems OK.' else: print 'Something seems to be wrong with the E-step.'
def train_with_em(num_components=100, num_steps=50, prior=None, draw_every=1): """Fit the mixture model in an unsupervised fashion using E-M.""" X = util.read_mnist_images(TRAIN_IMAGES_FILE) X_test = util.read_mnist_images(TEST_IMAGES_FILE) num_data, num_pixels = X.shape if prior is None: prior = Prior.default_prior() model = Model.random_initialization(prior, num_components, num_pixels) # mask which includes top half of pixels M = np.zeros(X.shape, dtype=int) M[:, :M.shape[1]//2] = 1 loglik_vals = [] for i in range(num_steps): # E-step R = model.compute_posterior(X) # M-step model.params.pi = model.update_pi(R) model.params.theta = model.update_theta(X, R) loglik = model.log_likelihood(X) / num_data loglik_vals.append(loglik) if (i+1) % draw_every == 0: model.visualize_components() model.visualize_predictions(X[:64, :], M[:64, :]) pylab.figure('Log-likelihood') pylab.clf() pylab.semilogx(np.arange(1, i+2), loglik_vals) pylab.title('Log-likelihood') pylab.xlabel('Number of E-M steps') pylab.draw() print('Final training log-likelihood:', model.log_likelihood(X) / num_data) print('Final test log-likelihood:', model.log_likelihood(X_test) / X_test.shape[0]) pylab.show() return model
def print_log_probs_by_digit_class(model): X = util.read_mnist_images(TRAIN_IMAGES_FILE) y = util.read_mnist_labels(TRAIN_LABELS_FILE) X_test = util.read_mnist_images(TEST_IMAGES_FILE) y_test = util.read_mnist_labels(TEST_LABELS_FILE) print('Training set') for digit in range(10): X_curr = X[y == digit, :] loglik = model.log_likelihood(X_curr) / X_curr.shape[0] print('Average log-probability of a {} image: {:1.3f}'.format( digit, loglik)) print() print('Test set') for digit in range(10): X_curr = X_test[y_test == digit, :] loglik = model.log_likelihood(X_curr) / X_curr.shape[0] print('Average log-probability of a {} image: {:1.3f}'.format( digit, loglik)) print()
def print_part_2_values(): """Print a set of values that we use to check the correctness of the implementation in Part 2.""" model = train_from_labels(show=False) X = util.read_mnist_images(TRAIN_IMAGES_FILE) M = np.zeros(X.shape, dtype=int) M[:, ::50] = 1 R = model.compute_posterior(X, M) P = model.posterior_predictive_means(X, M) print('R[0, 2]', R[0, 2]) print('R[1, 0]', R[1, 0]) print('P[0, 183]', P[0, 183]) print('P[2, 628]', P[2, 628])
def check_m_step(): """Check that the M-step updates by making sure they maximize the variational objective with respect to the model parameters.""" np.random.seed(0) NUM_IMAGES = 100 X = util.read_mnist_images(mixture.TRAIN_IMAGES_FILE) X = X[:NUM_IMAGES, :] R = np.random.uniform(size=(NUM_IMAGES, 10)) R /= R.sum(1).reshape((-1, 1)) model = mixture.Model.random_initialization(mixture.Prior.default_prior(), 10, 784) theta = model.update_theta(X, R) pi = model.update_pi(R) opt = variational_objective(model, X, R, pi, theta) ok = True for i in range(20): new_theta = perturb_theta(theta) new_obj = variational_objective(model, X, R, pi, new_theta) if new_obj > opt: ok = False if ok: print 'The theta update seems OK.' else: print 'Something seems to be wrong with the theta update.' if not np.allclose(np.sum(pi), 1.): print 'Uh-oh. pi does not seem to sum to 1.' else: ok = True for i in range(20): new_pi = perturb_pi(pi) new_obj = variational_objective(model, X, R, new_pi, theta) if new_obj > opt: ok = False if ok: print 'The pi update seems OK.' else: print 'Something seems to be wrong with the pi update.'
def print_part_1_values(): """Print a set of values that we use to check the correctness of the implementation in Part 1.""" NUM_IMAGES = 50 X = util.read_mnist_images(TRAIN_IMAGES_FILE) y = util.read_mnist_labels(TRAIN_LABELS_FILE) X, y = X[:NUM_IMAGES, :], y[:NUM_IMAGES] num_data, num_pixels = X.shape prior = Prior(2., 3., 4.) model = Model.random_initialization(prior, 10, IMAGE_DIM**2) R = np.zeros((num_data, 10)) R[np.arange(num_data), y] = 0.9 R += 0.01 model.params.pi = model.update_pi(R) model.params.theta = model.update_theta(X, R) print('pi[0]', model.params.pi[0]) print('pi[1]', model.params.pi[1]) print('theta[0, 239]', model.params.theta[0, 239]) print('theta[3, 298]', model.params.theta[3, 298])