def L(x_recon, x, y, z): if self.distributions['p_z'] == 'gaussian_marg': log_prior_z = tf.reduce_sum( utils.tf_gaussian_marg( z[1], z[2] ), 1 ) elif self.distributions['p_z'] == 'gaussian': log_prior_z = tf.reduce_sum( utils.tf_stdnormal_logpdf( z[0] ), 1 ) if self.distributions['p_y'] == 'uniform': y_prior = (1. / self.dim_y) * tf.ones_like( y ) log_prior_y = - tf.nn.softmax_cross_entropy_with_logits( labels=y_prior, logits=y ) if self.distributions['p_x'] == 'gaussian': log_lik = tf.reduce_sum( utils.tf_normal_logpdf( x, x_recon[0], x_recon[1] ), 1 ) if self.distributions['q_z'] == 'gaussian_marg': log_post_z = tf.reduce_sum( utils.tf_gaussian_ent( z[2] ), 1 ) elif self.distributions['q_z'] == 'gaussian': log_post_z = tf.reduce_sum( utils.tf_normal_logpdf( z[0], z[1], z[2] ), 1 ) _L = log_prior_y + log_lik + log_prior_z - log_post_z return _L
def L(x_recon, x, y, z): if self.distributions['p_z'] == 'gaussian_marg': log_prior_z = tf.reduce_sum( utils.tf_gaussian_marg( z[1], z[2] ), 1 ) elif self.distributions['p_z'] == 'gaussian': log_prior_z = tf.reduce_sum( utils.tf_stdnormal_logpdf( z[0] ), 1 ) if self.distributions['p_y'] == 'uniform': y_prior = (1. / self.dim_y) * tf.ones_like( y ) log_prior_y = - tf.nn.softmax_cross_entropy_with_logits( y_prior, y ) if self.distributions['p_x'] == 'gaussian': log_lik = tf.reduce_sum( utils.tf_normal_logpdf( x, x_recon[0], x_recon[1] ), 1 ) if self.distributions['q_z'] == 'gaussian_marg': log_post_z = tf.reduce_sum( utils.tf_gaussian_ent( z[2] ), 1 ) elif self.distributions['q_z'] == 'gaussian': log_post_z = tf.reduce_sum( utils.tf_normal_logpdf( z[0], z[1], z[2] ), 1 ) _L = log_prior_y + log_lik + log_prior_z - log_post_z return _L
def _objective( self ): ############ ''' Cost ''' ############ self.z_sample, self.z_mu, self.z_lsgms = self._generate_zx( self.x ) self.x_recon, self.x_recon_logits = self._generate_xz( self.z_sample ) if self.distributions['p_z'] == 'gaussian_marg': prior_z = tf.reduce_sum( utils.tf_gaussian_marg( self.z_mu, self.z_lsgms ), 1 ) if self.distributions['q_z'] == 'gaussian_marg': post_z = tf.reduce_sum( utils.tf_gaussian_ent( self.z_lsgms ), 1 ) if self.distributions['p_x'] == 'bernoulli': self.log_lik = - tf.reduce_sum( utils.tf_binary_xentropy( self.x, self.x_recon ), 1 ) l2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) self.cost = tf.reduce_mean( post_z - prior_z - self.log_lik ) + self.l2_loss * l2 ################## ''' Evaluation ''' ################## self.z_sample_eval, _, _ = self._generate_zx( self.x, phase = pt.Phase.test, reuse = True ) self.x_recon_eval, _ = self._generate_xz( self.z_sample_eval, phase = pt.Phase.test, reuse = True ) self.eval_log_lik = - tf.reduce_mean( tf.reduce_sum( utils.tf_binary_xentropy( self.x, self.x_recon_eval ), 1 ) )
def _objective(self): ############ ''' Cost ''' ############ self.z_sample, self.z_mu, self.z_lsgms = self._generate_zx(self.x) self.x_hat = self._generate_xz(self.z_sample) self.z_tau, _, _ = self._generate_zx(self.x_hat, reuse=True) if self.distributions['p_z'] == 'gaussian_marg': prior_z = tf.reduce_sum( utils.tf_gaussian_marg(self.z_mu, self.z_lsgms), 1) if self.distributions['q_z'] == 'gaussian_marg': post_z = tf.reduce_sum(utils.tf_gaussian_ent(self.z_lsgms), 1) if self.distributions['p_x'] == 'bernoulli': self.log_lik = -tf.reduce_sum( utils.tf_binary_xentropy(self.x, self.x_hat), 1) l2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) latent_cost = -0.5 * tf.reduce_sum( 1 + self.z_lsgms - tf.square(self.z_mu) - tf.exp(self.z_lsgms), axis=1) latent_loss = tf.reduce_mean(latent_cost) z_mean, z_var = tf.nn.moments(self.z_sample, axes=[0], keep_dims=True) z_tau_mean, z_tau_var = tf.nn.moments(self.z_tau, axes=[0], keep_dims=True) num = tf.reduce_mean(tf.multiply(tf.transpose(self.z_sample - z_mean), (self.z_tau - z_tau_mean)), axis=[0, 1]) den = tf.reduce_mean(tf.multiply(z_var, tf.transpose(z_tau_var))) self.corr_loss = -num / (den + 1e-6) self.mse_loss = tf.losses.mean_squared_error(labels=self.y, predictions=self.x_hat) # self.cost = tf.reduce_mean(post_z - prior_z) + self.corr_loss + self.mse_loss + self.l2_loss * l2 self.cost = self.mse_loss + latent_loss ################## ''' Evaluation ''' ################## self.z_sample_eval, _, _ = self._generate_zx(self.x, reuse=True) self.x_hat_eval = self._generate_xz(self.z_sample_eval, reuse=True) self.eval_log_lik = -tf.reduce_mean( tf.reduce_sum(utils.tf_binary_xentropy(self.x, self.x_hat_eval), 1))
def L(x_recon, x, y, z): if self.distributions['p_z'] == 'gaussian_marg': log_prior_z = tf.reduce_sum(utils.tf_gaussian_marg(z[1], z[2]), 1) elif self.distributions['p_z'] == 'gaussian': log_prior_z = tf.reduce_sum(utils.tf_stdnormal_logpdf(z[0]), 1) if self.distributions['p_y'] == 'uniform': y_prior = (1. / self.dim_y) * tf.ones_like(y) log_prior_y = -tf.nn.softmax_cross_entropy_with_logits( y_prior, y) if self.distributions['p_x'] == 'gaussian': log_lik = tf.reduce_sum( utils.tf_normal_logpdf(x, x_recon[0], x_recon[1]), 1) if self.distributions['q_z'] == 'gaussian_marg': log_post_z = tf.reduce_sum(utils.tf_gaussian_ent(z[2]), 1) elif self.distributions['q_z'] == 'gaussian': log_post_z = tf.reduce_sum( utils.tf_normal_logpdf(z[0], z[1], z[2]), 1) _L = log_prior_y + log_lik + log_prior_z - log_post_z return _L ########################### ''' Labelled Datapoints ''' ########################### self.y_lab_logits, self.x_lab = self._generate_yx( self.x_labelled_mu, self.x_labelled_lsgms) self.z_lab, self.z_lab_mu, self.z_lab_lsgms = self._generate_zxy( self.x_lab, self.y_lab) self.x_recon_lab_mu, self.x_recon_lab_lsgms = self._generate_xzy( self.z_lab, self.y_lab) L_lab = L([self.x_recon_lab_mu, self.x_recon_lab_lsgms], self.x_lab, self.y_lab, [self.z_lab, self.z_lab_mu, self.z_lab_lsgms]) L_lab += -self.beta * tf.nn.softmax_cross_entropy_with_logits( self.y_lab_logits, self.y_lab) ############################ ''' Unabelled Datapoints ''' ############################ def one_label_tensor(label): indices = [] values = [] for i in range(self.num_ulab_batch): indices += [[i, label]] values += [1.] _y_ulab = tf.sparse_tensor_to_dense( tf.SparseTensor(indices=indices, values=values, shape=[self.num_ulab_batch, self.dim_y]), 0.0) return _y_ulab self.y_ulab_logits, self.x_ulab = self._generate_yx( self.x_unlabelled_mu, self.x_unlabelled_lsgms, reuse=True) for label in range(self.dim_y): _y_ulab = one_label_tensor(label) self.z_ulab, self.z_ulab_mu, self.z_ulab_lsgms = self._generate_zxy( self.x_ulab, _y_ulab, reuse=True) self.x_recon_ulab_mu, self.x_recon_ulab_lsgms = self._generate_xzy( self.z_ulab, _y_ulab, reuse=True) _L_ulab = tf.expand_dims( L([self.x_recon_ulab_mu, self.x_recon_ulab_lsgms], self.x_ulab, _y_ulab, [self.z_ulab, self.z_ulab_mu, self.z_ulab_lsgms]), 1) if label == 0: L_ulab = tf.identity(_L_ulab) else: L_ulab = tf.concat(1, [L_ulab, _L_ulab]) self.y_ulab = self.y_ulab_logits.softmax_activation() U = tf.reduce_sum( tf.multiply(self.y_ulab, tf.subscribe(L_ulab, tf.log(self.y_ulab))), 1) def train(self, x_labelled, y, x_unlabelled, epochs, x_valid, y_valid, print_every=1, learning_rate=3e-4, beta1=0.9, beta2=0.999, seed=31415, stop_iter=100, save_path=None, load_path=None): ''' Session and Summary ''' if save_path is None: self.save_path = 'checkpoints/model_GC_{}-{}-{}_{}.cpkt'.format( self.num_lab, learning_rate, self.batch_size, time.time()) else: self.save_path = save_path np.random.seed(seed) tf.set_random_seed(seed) with self.G.as_default(): self.optimiser = tf.train.AdamOptimizer( learning_rate=learning_rate, beta1=beta1, beta2=beta2) self.train_op = self.optimiser.minimize(self.cost) init = tf.initialize_all_variables() self._test_vars = None _data_labelled = np.hstack([x_labelled, y]) _data_unlabelled = x_unlabelled x_valid_mu, x_valid_lsgms = x_valid[:, :self. dim_x], x_valid[:, self. dim_x:2 * self.dim_x] with self.session as sess: sess.run(init) if load_path == 'default': self.saver.restore(sess, self.save_path) elif load_path is not None: self.saver.restore(sess, load_path) best_eval_accuracy = 0. stop_counter = 0 for epoch in range(epochs): ''' Shuffle Data ''' np.random.shuffle(_data_labelled) np.random.shuffle(_data_unlabelled) ''' Training ''' for x_l_mu, x_l_lsgms, y, x_u_mu, x_u_lsgms in utils.feed_numpy_semisupervised( self.num_lab_batch, self.num_ulab_batch, _data_labelled[:, :2 * self.dim_x], _data_labelled[:, 2 * self.dim_x:], _data_unlabelled): training_result = sess.run( [self.train_op, self.cost], feed_dict={ self.x_labelled_mu: x_l_mu, self.x_labelled_lsgms: x_l_lsgms, self.y_lab: y, self.x_unlabelled_mu: x_u_mu, self.x_unlabelled_lsgms: x_u_lsgms }) training_cost = training_result[1] ''' Evaluation ''' stop_counter += 1 if epoch % print_every == 0: test_vars = tf.get_collection( bookkeeper.GraphKeys.TEST_VARIABLES) if test_vars: if test_vars != self._test_vars: self._test_vars = list(test_vars) self._test_var_init_op = tf.initialize_variables( test_vars) self._test_var_init_op.run() eval_accuracy, eval_cross_entropy = \ sess.run([self.eval_accuracy, self.eval_cross_entropy], feed_dict={ self.x_labelled_mu: x_valid_mu, self.x_labelled_lsgms: x_valid_lsgms, self.y_lab: y_valid } ) if eval_accuracy > best_eval_accuracy: best_eval_accuracy = eval_accuracy self.saver.save(sess, self.save_path) stop_counter = 0 utils.print_metrics( epoch + 1, ['Training', 'cost', training_cost], ['Validation', 'accuracy', eval_accuracy], [ 'Validation', 'cross-entropy', eval_cross_entropy ]) if stop_counter >= stop_iter: print('Stopping GC training') print( 'No change in validation accuracy for {} iterations' .format(stop_iter)) print('Best validation accuracy: {}'.format( best_eval_accuracy)) print('Model saved in {}'.format(self.save_path)) break ######################## ''' Prior on Weights ''' ######################## L_weights = 0. _weights = tf.trainable_variables() for w in _weights: L_weights += tf.reduce_sum(utils.tf_stdnormal_logpdf(w)) ################## ''' Total Cost ''' ################## L_lab_tot = tf.reduce_sum(L_lab) U_tot = tf.reduce_sum(U) self.cost = ((L_lab_tot + U_tot) * self.num_batches + L_weights) / (-self.num_batches * self.batch_size) ################## ''' Evaluation ''' ################## self.y_test_logits, _ = self._generate_yx(self.x_labelled_mu, self.x_labelled_lsgms, phase=pt.Phase.test, reuse=True) self.y_test_pred = self.y_test_logits.softmax(self.y_lab) self.eval_accuracy = self.y_test_pred \ .softmax.evaluate_classifier(self.y_lab, phase=pt.Phase.test) self.eval_cross_entropy = self.y_test_pred.loss self.eval_precision, self.eval_recall = self.y_test_pred.softmax \ .evaluate_precision_recall(self.y_lab, phase=pt.Phase.test)