def loss(self, x, y): with tf.name_scope('loss'): z_mu, z_lv = self._encode(x) z = GaussianSampleLayer(z_mu, z_lv) xh = self._generate(z, y) D_KL = tf.reduce_mean( GaussianKLD( slim.flatten(z_mu), slim.flatten(z_lv), slim.flatten(tf.zeros_like(z_mu)), slim.flatten(tf.zeros_like(z_lv)), )) logPx = tf.reduce_mean( GaussianLogDensity(slim.flatten(x), slim.flatten(xh), tf.zeros_like(slim.flatten(xh))), ) loss = dict() loss['G'] = -logPx + D_KL loss['D_KL'] = D_KL loss['logP'] = logPx tf.summary.scalar('KL-div', D_KL) tf.summary.scalar('logPx', logPx) tf.summary.histogram('xh', xh) tf.summary.histogram('x', x) return loss
def loss(self, x, y): ''' Args: x: shape=[s, b, c] y: shape=[s, b] Returns: a `dict` of losses ''' z_mu, z_lv = self._encode(x, is_training=self.is_training) z = GaussianSampleLayer(z_mu, z_lv) xh = self._decode(z, y, is_training=self.is_training) with tf.name_scope('loss'): with tf.name_scope('E_log_p_x_zy'): L_x = -1.0 * tf.reduce_mean( GaussianLogDensity(x, xh, tf.zeros_like(x)), ) with tf.name_scope('D_KL_z'): L_z = tf.reduce_mean( GaussianKLD(z_mu, z_lv, tf.zeros_like(z_mu), tf.zeros_like(z_lv))) loss = { 'L_x': L_x, 'L_z': L_z, } tf.summary.scalar('L_x', L_x) tf.summary.scalar('L_z', L_z) return loss
def loss(self, x, y): with tf.name_scope('loss'): z_mu, z_lv = self._encode(x) z = GaussianSampleLayer(z_mu, z_lv) xh = self._generate(z, y) D_KL = tf.reduce_mean( GaussianKLD( slim.flatten(z_mu), slim.flatten(z_lv), slim.flatten(tf.zeros_like(z_mu)), slim.flatten(tf.zeros_like(z_lv)), )) logPx = tf.reduce_mean( GaussianLogDensity(slim.flatten(x), slim.flatten(xh), tf.zeros_like(slim.flatten(xh))), ) dx = self._discriminate(x) dxh = self._discriminate(xh) W_dist = tf.reduce_mean(dx - dxh) g_loss = tf.reduce_mean(-dxh) batch_size = self.arch['training']['batch_size'] lam = self.arch['training']['lambda'] alpha_dist = tf.contrib.distributions.Uniform(low=0., high=1.) alpha = alpha_dist.sample((batch_size, 1, 1, 1)) interpolated = x + alpha * (xh - x) inte_logit = self._discriminate(interpolated) gradients = tf.gradients(inte_logit, [ interpolated, ])[0] grad_l2 = tf.sqrt( tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3])) gradient_penalty = tf.reduce_mean((grad_l2 - 1)**2) gp = lam * gradient_penalty loss = dict() alpha = self.arch['training']['alpha'] loss['l_E'] = -logPx + D_KL loss['D_KL'] = D_KL loss['logP'] = logPx loss['l_D'] = -W_dist + gp loss['l_G'] = -logPx + alpha * g_loss loss['W_dist'] = W_dist loss['gp'] = gp tf.summary.scalar('KL-div', D_KL) tf.summary.scalar('logPx', logPx) tf.summary.scalar('W_dist', W_dist) tf.summary.scalar("gp_loss", gradient_penalty) tf.summary.histogram('xh', xh) tf.summary.histogram('x', x) return loss
def loss(self, x_s, y_s, x_t, y_t): def circuit_loop(x, y): z_mu, z_lv = self._encode(x, is_training=self.is_training) z = GaussianSampleLayer(z_mu, z_lv) x_logit, x_feature = self._discriminate( x, is_training=self.is_training) xh, xh_sig_logit = self._generate(z, y, is_training=self.is_training) zh_mu, zh_lv = self._encode(xh, is_training=self.is_training) xh_logit, xh_feature = self._discriminate( xh, is_training=self.is_training) return dict( z=z, z_mu=z_mu, z_lv=z_lv, xh=xh, xh_sig_logit=xh_sig_logit, x_logit=x_logit, x_feature=x_feature, zh_mu=zh_mu, zh_lv=zh_lv, xh_logit=xh_logit, xh_feature=xh_feature, ) s = circuit_loop(x_s, y_s) t = circuit_loop(x_t, y_t) s2t = circuit_loop(x_s, y_t) with tf.name_scope('loss'): def mean_sigmoid_cross_entropy_with_logits(logit, truth): ''' truth: 0. or 1. ''' return tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logit, truth * tf.ones_like(logit))) loss = dict() # Parallel loss['reconst_t'] = \ tf.reduce_mean(t['x_logit']) \ - tf.reduce_mean(t['xh_logit']) # Parallel loss['reconst_s'] = \ tf.reduce_mean(s['x_logit']) \ - tf.reduce_mean(s['xh_logit']) # Non-parallel loss['conv_s2t'] = \ tf.reduce_mean(t['x_logit']) \ - tf.reduce_mean(s2t['xh_logit']) # Non-parallel: s v. t loss['real_s_t'] = \ tf.reduce_mean(t['x_logit']) \ - tf.reduce_mean(s['x_logit']) # That's why I only take the last term into consideration loss['WGAN'] = loss['conv_s2t'] # VAE's Kullback-Leibler Divergence loss['KL(z)'] = \ tf.reduce_mean( GaussianKLD( s['z_mu'], s['z_lv'], tf.zeros_like(s['z_mu']), tf.zeros_like(s['z_lv']))) +\ tf.reduce_mean( GaussianKLD( t['z_mu'], t['z_lv'], tf.zeros_like(t['z_mu']), tf.zeros_like(t['z_lv']))) loss['KL(z)'] /= 2.0 # VAE's Reconstruction Neg. Log-Likelihood (on the 'feature' space of Dx) loss['Dis'] = \ tf.reduce_mean( GaussianLogDensity( slim.flatten(x_t), slim.flatten(t['xh']), tf.zeros_like(slim.flatten(x_t)))) +\ tf.reduce_mean( GaussianLogDensity( slim.flatten(x_s), slim.flatten(s['xh']), tf.zeros_like(slim.flatten(x_s)))) loss['Dis'] /= -2.0 # For summaries with tf.name_scope('Summary'): tf.summary.scalar('DKL_z', loss['KL(z)']) tf.summary.scalar('MMSE', loss['Dis']) tf.summary.scalar('WGAN', loss['WGAN']) tf.summary.scalar('WGAN-s', loss['reconst_s']) tf.summary.scalar('WGAN-t', loss['reconst_t']) tf.summary.scalar('WGAN-s2t', loss['conv_s2t']) tf.summary.scalar('WGAN-t-s', loss['real_s_t']) tf.summary.histogram('y', tf.concat([y_t, y_s], 0)) tf.summary.histogram('z', tf.concat([s['z'], t['z']], 0)) tf.summary.histogram('z_s', s['z']) tf.summary.histogram('z_t', t['z']) tf.summary.histogram('z_mu', tf.concat([s['z_mu'], t['z_mu']], 0)) tf.summary.histogram('z_mu_s', s['z_mu']) tf.summary.histogram('z_mu_t', t['z_mu']) tf.summary.histogram('z_lv', tf.concat([s['z_lv'], t['z_lv']], 0)) tf.summary.histogram('z_lv_s', s['z_lv']) tf.summary.histogram('z_lv_t', t['z_lv']) tf.summary.histogram('logit_t_from_t', t['xh_logit']) tf.summary.histogram('logit_t_from_s', s2t['xh_logit']) tf.summary.histogram('logit_t', t['x_logit']) tf.summary.histogram( 'logit_t_True_FromT_FromS', tf.concat([t['x_logit'], t['xh_logit'], s2t['xh_logit']], 0)) tf.summary.histogram( 'logit_s_v_sh', tf.concat([s['x_logit'], s['xh_logit']], 0)) tf.summary.histogram( 'logit_t_v_th', tf.concat([t['x_logit'], t['xh_logit']], 0)) return loss
def loss(self, x, y): with tf.name_scope('loss'): with tf.variable_scope( "encoder") as scope: #specify variable_scope z_mu, z_lv = self.encoder( x, self.is_training) #so that to collect trainable z = GaussianSampleLayer(z_mu, z_lv) # variables with tf.variable_scope("generator") as scope: xh = self.generator(z, y, self.is_training) print("xh shape:", xh.get_shape().as_list()) #xh = self.nchw_to_nhwc(xh) print("xh shape:", xh.get_shape().as_list()) with tf.variable_scope("discriminator") as scope: #x = nchw_to_nhwc(x) disc_real, x_through_d = self.discriminator( x, self.is_training) print("disc_real shape:", disc_real.get_shape().as_list()) print("x_through_d:", x_through_d.get_shape().as_list()) disc_fake, xh_through_d = self.discriminator( xh, self.is_training) D_KL = tf.reduce_mean( GaussianKLD( slim.flatten(z_mu), slim.flatten(z_lv), slim.flatten(tf.zeros_like(z_mu)), slim.flatten(tf.zeros_like(z_lv)), )) logPx = -tf.reduce_mean( GaussianLogDensity(x_through_d, xh_through_d, tf.zeros_like(xh_through_d)), ) loss = dict() loss['D_KL'] = D_KL loss['logP'] = logPx batch_size = self.arch['training']['batch_size'] #disc_real_loss = tf.losses.sigmoid_cross_entropy(disc_real, tf.ones([batch_size, 1])) #disc_fake_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.fill([batch_size, 1], -1.0)) gen_loss = -tf.reduce_mean(disc_fake) disc_loss = tf.reduce_mean(disc_fake - disc_real) alpha = tf.random_uniform(shape=[batch_size, 513, 1, 1], minval=0., maxval=1.) self.reuse = False #gradient penalty print("before gradient x shape:", x.get_shape().as_list()) differences = xh - x interpolates = x + (alpha * differences) print("interpolates shape:", interpolates.get_shape().as_list()) pred, inter_h = self.discriminator(interpolates, self.is_training) print("pred shape:", pred.get_shape().as_list()) gradients = tf.gradients(pred, [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1, 2])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) disc_loss += self.arch['LAMBDA'] * gradient_penalty self.reuse = True #d_loss = disc_real_loss + disc_fake_loss #g_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.ones([batch_size, 1])) g_loss = gen_loss d_loss = disc_loss loss['xh'] = xh loss['l_G'] = g_loss loss['l_D'] = d_loss loss['l_E'] = D_KL + logPx loss['G'] = D_KL + logPx + 50. * d_loss return loss
def loss(self, x, y, is_training=True): batch_size = self.arch['training']['batch_size'] alpha = tf.random_uniform(shape=[batch_size, 1, 1, 1], minval=0., maxval=1.) with tf.name_scope('loss'): with tf.variable_scope( "encoder") as scope: #specify variable_scope z_mu, z_lv = self.encoder( x, is_training) #so that to collect trainable z = GaussianSampleLayer(z_mu, z_lv) # variables D_KL = tf.reduce_mean( GaussianKLD( slim.flatten(z_mu), slim.flatten(z_lv), #slim.flatten(tf.zeros_like(z_mu)), #slim.flatten(tf.zeros_like(z_lv)), slim.flatten(tf.random_normal(tf.shape(z_mu))), slim.flatten(tf.random_normal(tf.shape(z_lv))), )) with tf.variable_scope("generator") as scope: xh = self.generator(z, y, is_training) print("xh shape:", xh.get_shape().as_list()) xh = self.nchw_to_nhwc(xh) print("xh shape:", xh.get_shape().as_list()) with tf.variable_scope("discriminator") as scope: disc_real, x_through_d = self.discriminator(x, is_training) print("disc_real shape:", disc_real.get_shape().as_list()) print("x_through_d:", x_through_d.get_shape().as_list()) disc_fake, xh_through_d = self.discriminator(xh, is_training, reuse=True) logPx = -tf.reduce_mean( GaussianLogDensity( x_through_d, xh_through_d, tf.zeros_like(xh_through_d), )) print("before gradient x shape:", x.get_shape().as_list()) differences = xh - x differences = self.nhwc_to_nchw(differences) print("differences shape:", differences.get_shape().as_list()) interpolates = self.nhwc_to_nchw(x) + (alpha * differences) print("interpolates shape:", interpolates.get_shape().as_list()) interpolates = tf.transpose(interpolates, [0, 2, 1, 3]) pred, inter_h = self.discriminator(interpolates, is_training, reuse=True) print("pred shape:", pred.get_shape().as_list()) gradients = tf.gradients(pred, [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) gradient_penalty = self.arch['LAMBDA'] * gradient_penalty tf.summary.histogram("gradient_penalty", gradient_penalty) loss = dict() loss['D_KL'] = D_KL loss['logP'] = logPx #disc_real_loss = tf.losses.sigmoid_cross_entropy(disc_real, tf.ones([batch_size, 1])) #disc_fake_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.fill([batch_size, 1], -1.0)) #disc_real_loss = -tf.reduce_mean(disc_real) #disc_fake_loss = -tf.reduce_mean(disc_fake_loss) gen_loss = tf.reduce_mean(-disc_fake) disc_loss = tf.reduce_mean(disc_real) - tf.reduce_mean(disc_fake) #gradient penalty #d_loss = disc_real_loss + disc_fake_loss #g_loss = tf.losses.sigmoid_cross_entropy(disc_fake, tf.ones([batch_size, 1])) g_loss = gen_loss d_loss = disc_loss + gradient_penalty tf.summary.histogram("D_KL", D_KL) tf.summary.histogram("logpx", logPx) tf.summary.histogram('xh', xh) tf.summary.histogram('x', x) tf.summary.histogram('gen_loss', gen_loss) tf.summary.histogram('disc_loss', disc_loss) tf.summary.histogram("gradient penalty", gradient_penalty) tf.summary.histogram("Discriminator_loss", d_loss) tf.summary.histogram("Generator_loss", g_loss) loss['l_G'] = g_loss + logPx loss['l_D'] = d_loss loss['l_E'] = D_KL + logPx loss['G'] = (D_KL + logPx) + 50. * g_loss + loss['l_D'] return loss