def main(): tf.set_random_seed(1234) np.random.seed(1234) # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') x_labeled, t_labeled, x_unlabeled, x_test, t_test = \ dataset.load_mnist_semi_supervised(data_path, one_hot=True) x_test = np.random.binomial(1, x_test, size=x_test.shape).astype('float32') n_labeled, n_x = x_labeled.shape n_y = 10 # Define model parameters n_z = 100 # Define training/evaluation parameters ll_samples = 10 beta = 1200. epochs = 3000 batch_size = 100 test_batch_size = 100 iters = x_unlabeled.shape[0] // batch_size test_iters = x_test.shape[0] // test_batch_size test_freq = 10 learning_rate = 0.0003 anneal_lr_freq = 200 anneal_lr_rate = 0.75 # Build the computation graph n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles') x_orig = tf.placeholder(tf.float32, shape=[None, n_x], name='x') x_bin = tf.cast(tf.less(tf.random_uniform(tf.shape(x_orig), 0, 1), x_orig), tf.int32) def log_joint(observed): n = tf.shape(observed['x'])[1] model = M2(observed, n, n_x, n_y, n_z, n_particles) log_px_zy, log_py, log_pz = model.local_log_prob(['x', 'y', 'z']) return log_px_zy + log_pz + log_py # Labeled x_labeled_ph = tf.placeholder(tf.int32, shape=(None, n_x), name='x_l') x_labeled_obs = tf.tile(tf.expand_dims(x_labeled_ph, 0), [n_particles, 1, 1]) y_labeled_ph = tf.placeholder(tf.int32, shape=(None, n_y), name='y_l') y_labeled_obs = tf.tile(tf.expand_dims(y_labeled_ph, 0), [n_particles, 1, 1]) proposal = labeled_proposal(x_labeled_ph, y_labeled_ph, n_z, n_particles) qz_samples, log_qz = proposal.query('z', outputs=True, local_log_prob=True) # adapting the proposal labeled_klpq_obj = zs.variational.klpq(log_joint, observed={'x': x_labeled_obs, 'y': y_labeled_obs}, latent={'z': [qz_samples, log_qz]}, axis=0) labeled_klpq_cost = tf.reduce_mean(labeled_klpq_obj.rws()) # learning model parameters labeled_lower_bound = tf.reduce_mean( zs.variational.importance_weighted_objective( log_joint, observed={'x': x_labeled_obs, 'y': y_labeled_obs}, latent={'z': [qz_samples, log_qz]}, axis=0)) # Unlabeled x_unlabeled_ph = tf.placeholder(tf.int32, shape=(None, n_x), name='x_u') x_unlabeled_obs = tf.tile(tf.expand_dims(x_unlabeled_ph, 0), [n_particles, 1, 1]) proposal = unlabeled_proposal(x_unlabeled_ph, n_y, n_z, n_particles) qy_samples, log_qy = proposal.query('y', outputs=True, local_log_prob=True) qz_samples, log_qz = proposal.query('z', outputs=True, local_log_prob=True) # adapting the proposal unlabeled_klpq_obj = zs.variational.klpq( log_joint, observed={'x': x_unlabeled_obs}, latent={'y': [qy_samples, log_qy], 'z': [qz_samples, log_qz]}, axis=0) unlabeled_klpq_cost = tf.reduce_mean(unlabeled_klpq_obj.rws()) # learning model parameters unlabeled_lower_bound = tf.reduce_mean( zs.variational.importance_weighted_objective( log_joint, observed={'x': x_unlabeled_obs}, latent={'y': [qy_samples, log_qy], 'z': [qz_samples, log_qz]}, axis=0)) # Build classifier qy_logits_l = qy_x(x_labeled_ph, n_y) qy_l = tf.nn.softmax(qy_logits_l) pred_y = tf.argmax(qy_l, 1) acc = tf.reduce_sum( tf.cast(tf.equal(pred_y, tf.argmax(y_labeled_ph, 1)), tf.float32) / tf.cast(tf.shape(x_labeled_ph)[0], tf.float32)) onehot_cat = zs.distributions.OnehotCategorical(qy_logits_l) log_qy_x = onehot_cat.log_prob(y_labeled_ph) classifier_cost = -beta * tf.reduce_mean(log_qy_x) klpq_cost = labeled_klpq_cost + unlabeled_klpq_cost model_cost = -labeled_lower_bound - unlabeled_lower_bound # Gather gradients learning_rate_ph = tf.placeholder(tf.float32, shape=[], name='lr') optimizer = tf.train.AdamOptimizer(learning_rate_ph) model_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='model') model_grads = optimizer.compute_gradients(model_cost / 2., model_params) klpq_grads = optimizer.compute_gradients(klpq_cost / 2.) classifier_grads = optimizer.compute_gradients(classifier_cost / 2.) infer_op = optimizer.apply_gradients( model_grads + klpq_grads + classifier_grads) params = tf.trainable_variables() for i in params: print(i.name, i.get_shape()) # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() if epoch % anneal_lr_freq == 0: learning_rate *= anneal_lr_rate np.random.shuffle(x_unlabeled) lbs_labeled, lbs_unlabeled, train_accs = [], [], [] for t in range(iters): labeled_indices = np.random.randint(0, n_labeled, size=batch_size) x_labeled_batch = x_labeled[labeled_indices] y_labeled_batch = t_labeled[labeled_indices] x_unlabeled_batch = x_unlabeled[t * batch_size: (t + 1) * batch_size] x_labeled_batch_bin = sess.run( x_bin, feed_dict={x_orig: x_labeled_batch}) x_unlabeled_batch_bin = sess.run( x_bin, feed_dict={x_orig: x_unlabeled_batch}) _, lb_labeled, lb_unlabeled, train_acc = sess.run( [infer_op, labeled_lower_bound, unlabeled_lower_bound, acc], feed_dict={x_labeled_ph: x_labeled_batch_bin, y_labeled_ph: y_labeled_batch, x_unlabeled_ph: x_unlabeled_batch_bin, learning_rate_ph: learning_rate, n_particles: ll_samples}) lbs_labeled.append(lb_labeled) lbs_unlabeled.append(lb_unlabeled) train_accs.append(train_acc) time_epoch += time.time() print('Epoch {} ({:.1f}s), Lower bound: labeled = {}, ' 'unlabeled = {} Accuracy: {:.2f}%'. format(epoch, time_epoch, np.mean(lbs_labeled), np.mean(lbs_unlabeled), np.mean(train_accs) * 100.)) if epoch % test_freq == 0: time_test = -time.time() test_lls_labeled, test_lls_unlabeled, test_accs = [], [], [] for t in range(test_iters): test_x_batch = x_test[ t * test_batch_size: (t + 1) * test_batch_size] test_y_batch = t_test[ t * test_batch_size: (t + 1) * test_batch_size] test_ll_labeled, test_ll_unlabeled, test_acc = sess.run( [labeled_lower_bound, unlabeled_lower_bound, acc], feed_dict={x_labeled_ph: test_x_batch, y_labeled_ph: test_y_batch, x_unlabeled_ph: test_x_batch, n_particles: ll_samples}) test_lls_labeled.append(test_ll_labeled) test_lls_unlabeled.append(test_ll_unlabeled) test_accs.append(test_acc) time_test += time.time() print('>>> TEST ({:.1f}s)'.format(time_test)) print('>> Test lower bound: labeled = {}, unlabeled = {}'. format(np.mean(test_lls_labeled), np.mean(test_lls_unlabeled))) print('>> Test accuracy: {:.2f}%'.format( 100. * np.mean(test_accs)))
@zs.reuse('classifier') def qy_x(x, n_y): ly_x = layers.fully_connected(tf.to_float(x), 500) ly_x = layers.fully_connected(ly_x, 500) ly_x = layers.fully_connected(ly_x, n_y, activation_fn=None) return ly_x if __name__ == "__main__": tf.set_random_seed(1234) # Load MNIST data_path = os.path.join(conf.data_dir, 'mnist.pkl.gz') np.random.seed(1234) x_labeled, t_labeled, x_unlabeled, x_test, t_test = \ dataset.load_mnist_semi_supervised(data_path, one_hot=True) x_test = np.random.binomial(1, x_test, size=x_test.shape).astype('float32') n_labeled, n_x = x_labeled.shape n_y = 10 # Define model parameters n_z = 100 # Define training/evaluation parameters lb_samples = 10 beta = 1200. epoches = 3000 batch_size = 100 test_batch_size = 100 iters = x_unlabeled.shape[0] // batch_size test_iters = x_test.shape[0] // test_batch_size
def main(): tf.set_random_seed(1234) np.random.seed(1234) # Load MNIST data_path = os.path.join(conf.data_dir, "mnist.pkl.gz") x_labeled, t_labeled, x_unlabeled, x_test, t_test = \ dataset.load_mnist_semi_supervised(data_path, one_hot=True) x_test = np.random.binomial(1, x_test, size=x_test.shape) n_labeled, x_dim = x_labeled.shape n_class = 10 # Define model parameters z_dim = 100 beta = 1200. # Build the computation graph n = tf.placeholder(tf.int32, shape=[], name="n") n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles") model = build_gen(n, x_dim, n_class, z_dim, n_particles) # Labeled x_labeled_ph = tf.placeholder(tf.float32, shape=[None, x_dim], name="x_l") x_labeled = tf.cast( tf.less(tf.random_uniform(tf.shape(x_labeled_ph)), x_labeled_ph), tf.int32) y_labeled_ph = tf.placeholder(tf.int32, shape=[None, n_class], name="y_l") proposal = labeled_proposal(x_labeled, y_labeled_ph, z_dim, n_particles) # adapting the proposal labeled_klpq_obj = zs.variational.klpq(model, observed={ "x": x_labeled, "y": y_labeled_ph }, variational=proposal, axis=0) labeled_q_cost = tf.reduce_mean(labeled_klpq_obj.importance()) # learning model parameters labeled_lower_bound = tf.reduce_mean( zs.variational.importance_weighted_objective(model, observed={ 'x': x_labeled, 'y': y_labeled_ph }, variational=proposal, axis=0)) # Unlabeled x_unlabeled_ph = tf.placeholder(tf.float32, shape=[None, x_dim], name="x_u") x_unlabeled = tf.cast( tf.less(tf.random_uniform(tf.shape(x_unlabeled_ph)), x_unlabeled_ph), tf.int32) proposal = unlabeled_proposal(x_unlabeled, n_class, z_dim, n_particles) # adapting the proposal unlabeled_klpq_obj = zs.variational.klpq(model, observed={'x': x_unlabeled}, variational=proposal, axis=0) unlabeled_q_cost = tf.reduce_mean(unlabeled_klpq_obj.importance()) # learning model parameters unlabeled_lower_bound = tf.reduce_mean( zs.variational.importance_weighted_objective( model, observed={'x': x_unlabeled}, variational=proposal, axis=0)) # Build classifier qy_logits_l = qy_x(x_labeled, n_class) qy_l = tf.nn.softmax(qy_logits_l) pred_y = tf.argmax(qy_l, 1) acc = tf.reduce_sum( tf.cast(tf.equal(pred_y, tf.argmax(y_labeled_ph, 1)), tf.float32) / tf.cast(tf.shape(x_labeled)[0], tf.float32)) onehot_cat = zs.distributions.OnehotCategorical(qy_logits_l) log_qy_x = onehot_cat.log_prob(y_labeled_ph) classifier_cost = -beta * tf.reduce_mean(log_qy_x) # Gather gradients proposal_cost = labeled_q_cost + unlabeled_q_cost + classifier_cost model_cost = -labeled_lower_bound - unlabeled_lower_bound optimizer = tf.train.AdamOptimizer(learning_rate=3e-4) model_params = tf.trainable_variables(scope="gen") model_grads = optimizer.compute_gradients(model_cost, var_list=model_params) proposal_params = (tf.trainable_variables(scope="qy_x") + tf.trainable_variables(scope="qz_xy")) proposal_grads = optimizer.compute_gradients(proposal_cost, var_list=proposal_params) infer_op = optimizer.apply_gradients(model_grads + proposal_grads) # Define training/evaluation parameters ll_samples = 10 epochs = 3000 batch_size = 100 iters = x_unlabeled.shape[0] // batch_size test_freq = 10 test_batch_size = 100 test_iters = x_test.shape[0] // test_batch_size # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(1, epochs + 1): time_epoch = -time.time() np.random.shuffle(x_unlabeled) lbs_labeled, lbs_unlabeled, train_accs = [], [], [] for t in range(iters): labeled_indices = np.random.randint(0, n_labeled, size=batch_size) x_labeled_batch = x_labeled[labeled_indices] y_labeled_batch = t_labeled[labeled_indices] x_unlabeled_batch = x_unlabeled[t * batch_size:(t + 1) * batch_size] _, lb_labeled, lb_unlabeled, train_acc = sess.run( [ infer_op, labeled_lower_bound, unlabeled_lower_bound, acc ], feed_dict={ x_labeled_ph: x_labeled_batch, y_labeled_ph: y_labeled_batch, x_unlabeled_ph: x_unlabeled_batch, n_particles: ll_samples, n: batch_size }) lbs_labeled.append(lb_labeled) lbs_unlabeled.append(lb_unlabeled) train_accs.append(train_acc) time_epoch += time.time() print('Epoch {} ({:.1f}s), Lower bound: labeled = {}, ' 'unlabeled = {} Accuracy: {:.2f}%'.format( epoch, time_epoch, np.mean(lbs_labeled), np.mean(lbs_unlabeled), np.mean(train_accs) * 100.)) if epoch % test_freq == 0: time_test = -time.time() test_lls_labeled, test_lls_unlabeled, test_accs = [], [], [] for t in range(test_iters): test_x_batch = x_test[t * test_batch_size:(t + 1) * test_batch_size] test_y_batch = t_test[t * test_batch_size:(t + 1) * test_batch_size] test_ll_labeled, test_ll_unlabeled, test_acc = sess.run( [labeled_lower_bound, unlabeled_lower_bound, acc], feed_dict={ x_labeled: test_x_batch, y_labeled_ph: test_y_batch, x_unlabeled: test_x_batch, n_particles: ll_samples, n: test_batch_size }) test_lls_labeled.append(test_ll_labeled) test_lls_unlabeled.append(test_ll_unlabeled) test_accs.append(test_acc) time_test += time.time() print('>>> TEST ({:.1f}s)'.format(time_test)) print( '>> Test lower bound: labeled = {}, unlabeled = {}'.format( np.mean(test_lls_labeled), np.mean(test_lls_unlabeled))) print('>> Test accuracy: {:.2f}%'.format(100. * np.mean(test_accs)))