def sg_mse(tensor, opt): r"""Returns squared error between `tensor` and `target`. Args: tensor: A `Tensor`. target: A `Tensor` with the same shape and dtype as `tensor`. Returns: A `Tensor` of the same shape and dtype as `tensor` For example, ``` tensor = [[34, 11, 40], [13, 30, 42]] target = [[34, 10, 41], [14, 31, 40]] tensor.sg_mse(target=target) => [[ 0. 1. 1.] [ 1. 1. 4.]] ``` """ assert opt.target is not None, 'target is mandatory.' # squared error out = tf.identity(tf.square(tensor - opt.target), 'mse') # add summary tf.sg_summary_loss(out) return out
def sg_mse(tensor, opt): assert opt.target is not None, 'target is mandatory.' # squared error out = tf.identity(tf.square(tensor - opt.target), 'mse') # add summary tf.sg_summary_loss(out) return out
def penalize_loss(gamma, lambd, tensor, tensor_n): #gamma * (vector-vector_d)**2 - lamada * (vector dot vector_d)/(nor(vector)*nor(vector_d)) with tf.sg_context(name='penalize'): square = tf.reduce_sum(tf.reduce_sum(tf.square(tensor - tensor_n), 2), 1) cosine = tf.reduce_sum( tf.reduce_sum( tf.multiply(tf.nn.l2_normalize(tensor, 2), tf.nn.l2_normalize(tensor_n, 2)), 2), 1) return gamma * square - lambd * cosine
def wrapper(tensor, **kwargs): r"""Manages arguments of `tf.sg_opt`. Args: tensor: A `tensor` (automatically passed by decorator). kwargs: shape: A list of integers. The shape of `tensor`. Inferred if not specified. in_dim: An integer. The size of input dimension, which is set to the last one by default. dim: An integer. The size of output dimension. Has the same value as in_dim by default. bn: Boolean. If True, batch normalization is applied. ln: Boolean. If True, layer normalization is applied. dout: A float of range [0, 100). A dropout rate. Set to 0 by default. bias: Boolean. If True, biases are added. As a default, it is set to True name: A name for the layer. As a default, the function name is assigned. act: A name of activation function. e.g., `sigmoid`, `tanh`, etc. reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. regularizer: A string. None, 'l1' or 'l2'. The default is None summary: If True, summaries are added. The default is True. """ from . import sg_initializer as init from . import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + sg_get_context() # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0, summary=True) if opt.regularizer == 'l1': opt.regularizer = lambda x: tf.reduce_mean(tf.abs(x)) elif opt.regularizer == 'l2': opt.regularizer = lambda x: tf.square( tf.reduce_mean(tf.square(x))) else: opt.regularizer = None assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.global_variables(): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) with tf.variable_scope(opt.name, reuse=opt.reuse) as scope: # call layer function out = func(tensor, opt) # apply batch normalization if opt.bn: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # calc batch mean, variance mean, variance = tf.nn.moments( out, axes=list(range(len(out.get_shape()) - 1))) # offset, scale parameter ( for inference ) mean_running = init.constant('mean', opt.dim, trainable=False) variance_running = init.constant('variance', opt.dim, value=1, trainable=False) # add running mean, variance to UPDATE_OP collection decay = 0.99 tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, mean_running.assign(mean_running * decay + mean * (1 - decay))) tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, variance_running.assign(variance_running * decay + variance * (1 - decay))) # select mean, variance by training phase m, v = tf.cond( _phase, lambda: (mean, variance), # batch mean, variance lambda: (mean_running, variance_running)) # saved mean, variance # apply batch normalization out = tf.nn.batch_normalization(out, m, v, beta, gamma, tf.sg_eps) # apply layer normalization if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # calc layer mean, variance for final axis mean, variance = tf.nn.moments(out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter out = gamma * out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + sg_get_context(), prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
with tf.sg_context(name='encoder', size=4, stride=2, act='relu'): mu = (x.sg_conv(dim=64).sg_conv(dim=128).sg_flatten().sg_dense( dim=1024).sg_dense(dim=num_dim, act='linear')) # re-parameterization trick with random gaussian z = mu + tf.random_normal(mu.get_shape()) # decoder network with tf.sg_context(name='decoder', size=4, stride=2, act='relu'): xx = (z.sg_dense(dim=1024).sg_dense(dim=7 * 7 * 128).sg_reshape( shape=(-1, 7, 7, 128)).sg_upconv(dim=64).sg_upconv(dim=1, act='sigmoid')) # add image summary tf.sg_summary_image(x, name='origin') tf.sg_summary_image(xx, name='recon') # loss loss_recon = xx.sg_mse(target=x, name='recon').sg_mean(axis=[1, 2, 3]) loss_kld = tf.square(mu).sg_sum(axis=1) / (28 * 28) tf.sg_summary_loss(loss_kld, name='kld') loss = loss_recon + loss_kld * 0.5 # do training tf.sg_train(loss=loss, log_interval=10, ep_size=data.train.num_batch, max_ep=30, early_stop=False, save_dir='asset/train/vae')
bn=False) d_p4 = ops.upconv_and_scale(d_p3, dim=1, size=size, stride=stride, act='linear', bn=False) disc = d_p4 # # pull-away term ( PT ) regularizer # sample = gen.sg_flatten() nom = tf.matmul(sample, tf.transpose(sample, perm=[1, 0])) denom = tf.reduce_sum(tf.square(sample), reduction_indices=[1], keep_dims=True) pt = tf.square(nom / denom) pt -= tf.diag(tf.diag_part(pt)) pt = tf.reduce_sum(pt) / (batch_size * (batch_size - 1)) # # loss & train ops # # mean squared errors mse = tf.reduce_mean(tf.square(disc - xx), reduction_indices=[1, 2, 3]) mse_real, mse_fake = mse[:batch_size], mse[batch_size:] loss_disc = mse_real + tf.maximum(margin - mse_fake, 0) # discriminator loss loss_gen = mse_fake + pt * pt_weight # generator loss + PT regularizer
# add image summary tf.sg_summary_image(x, name='real') tf.sg_summary_image(gen, name='fake') # discriminator disc_real = discriminator(x) disc_fake = discriminator(gen) # # pull-away term ( PT ) regularizer # sample = gen.sg_flatten() nom = tf.matmul(sample, tf.transpose(sample, perm=[1, 0])) denom = tf.reduce_sum(tf.square(sample), reduction_indices=[1], keep_dims=True) pt = tf.square(nom / denom) pt -= tf.diag(tf.diag_part(pt)) pt = tf.reduce_sum(pt) / (batch_size * (batch_size - 1)) # # loss & train ops # # mean squared errors mse_real = tf.reduce_mean(tf.square(disc_real - x), reduction_indices=[1, 2, 3]) mse_fake = tf.reduce_mean(tf.square(disc_fake - gen), reduction_indices=[1, 2, 3]) # discriminator loss
.sg_dense(dim=1024) .sg_dense(dim=num_dim, act='linear')) # re-parameterization trick with random gaussian z = mu + tf.random_normal(mu.get_shape()) # decoder network with tf.sg_context(name='decoder', size=4, stride=2, act='relu'): xx = (z .sg_dense(dim=1024) .sg_dense(dim=7*7*128) .sg_reshape(shape=(-1, 7, 7, 128)) .sg_upconv(dim=64) .sg_upconv(dim=1, act='sigmoid')) # add image summary tf.sg_summary_image(x, name='origin') tf.sg_summary_image(xx, name='recon') # loss loss_recon = xx.sg_mse(target=x, name='recon').sg_mean(dims=[1, 2, 3]) loss_kld = tf.square(mu).sg_sum(dims=1) / (28 * 28) tf.sg_summary_loss(loss_kld, name='kld') loss = loss_recon + loss_kld * 0.5 # do training tf.sg_train(loss=loss, log_interval=10, ep_size=data.train.num_batch, max_ep=30, early_stop=False, save_dir='asset/train/vae')