def update_running_stat(): decay = 0.99 update_op = [ mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay)) ] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance)
def sg_ce(tensor, opt): r"""Returns softmax cross entropy loss between `tensor` and `target`. Args: tensor: A `Tensor`. Logits. Unscaled log probabilities. opt: target: A `Tensor` with the same length in the first dimension as the `tensor`. Labels. one_hot: Boolean. Whether to treat the labels as one-hot encoding. Default is False. mask: Boolean. If True, zeros in the target will be excluded from the calculation. name: A `string`. A name to display in the tensor board web UI. Returns: A 1-D `Tensor` with the same shape as `tensor`. For example, ``` tensor = [[[2, -1, 3], [3, 1, -2]]] target = [[2, 1]] tensor.sg_ce(target=target) => [[ 0.32656264 2.13284516]] ``` For example, ``` tensor = [[2, -1, 3], [3, 1, -2]] target = [[0, 0, 1], [1, 0, 0]] tensor.sg_ce(target=target, one_hot=True) => [ 0.32656264 0.13284527] ``` """ opt += tf.sg_opt(one_hot=False) assert opt.target is not None, 'target is mandatory.' if opt.one_hot: out = tf.identity( tf.nn.softmax_cross_entropy_with_logits(labels=opt.target, logits=tensor), 'ce') else: out = tf.identity( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=opt.target, logits=tensor), 'ce') # masking loss if opt.mask: out *= tf.not_equal(opt.target, tf.zeros_like(opt.target)).sg_float() # add summary tf.sg_summary_loss(out, name=opt.name) return out
def sg_mse(tensor, opt): r"""Returns squared error between `tensor` and `target`. Args: tensor: A `Tensor`. target: A `Tensor` with the same shape and dtype as `tensor`. Returns: A `Tensor` of the same shape and dtype as `tensor` For example, ``` tensor = [[34, 11, 40], [13, 30, 42]] target = [[34, 10, 41], [14, 31, 40]] tensor.sg_mse(target=target) => [[ 0. 1. 1.] [ 1. 1. 4.]] ``` """ assert opt.target is not None, 'target is mandatory.' # squared error out = tf.identity(tf.square(tensor - opt.target), 'mse') # add summary tf.sg_summary_loss(out) return out
def ner_accuracy(tensor, opt): r"""Returns accuracy of predictions. Args: tensor: A `Tensor`. Probability distributions or unscaled prediction scores. opt: target: A 'Tensor`. Labels. Returns: A `Tensor` of the same shape as `tensor`. Each value will be 1 if correct else 0. For example, ``` tensor = [[20.1, 18, -4.2], [0.04, 21.1, 31.3]] target = [[0, 1]] tensor.sg_accuracy(target=target) => [[ 1. 0.]] ``` """ assert opt.target is not None, 'target is mandatory.' opt += tf.sg_opt(k=1) # # calc accuracy out = tf.identity(tf.equal(tensor.sg_argmax() + 1, tf.cast(opt.target, tf.int64)).sg_float(), name='acc') # out = tf.identity(tf.nn.in_top_k(tensor, opt.target, opt.k).sg_float(), name='acc') # masking padding if opt.mask: out += tf.equal(opt.target, tf.zeros_like(opt.target)).sg_float() return out
def sg_mae(tensor, opt): r"""Returns absolute error between `tensor` and `target`. Args: tensor: A `Tensor`. opt: target: A `Tensor` with the same shape and dtype as `tensor`. name: A `string`. A name to display in the tensor board web UI. Returns: A `Tensor` of the same shape and dtype as `tensor` For example, ``` tensor = [[34, 11, 40], [13, 30, 42]] target = [[34, 10, 41], [14, 31, 40]] tensor.sg_mse(target=target) => [[ 0. 1. 1.] [ 1. 1. 2.]] ``` """ assert opt.target is not None, 'target is mandatory.' # absolute error out = tf.identity(tf.abs(tensor - opt.target), 'mae') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def sg_ce(tensor, opt): opt += tf.sg_opt(one_hot=False) assert opt.target is not None, 'target is mandatory.' if opt.one_hot: out = tf.identity( tf.nn.softmax_cross_entropy_with_logits(tensor, opt.target), 'ce') else: out = tf.identity( tf.nn.sparse_softmax_cross_entropy_with_logits(tensor, opt.target), 'ce') # add summary tf.sg_summary_loss(out) return out
def sg_ctc(tensor, opt): r"""Computes the CTC (Connectionist Temporal Classification) Loss between `tensor` and `target`. Args: tensor: A 3-D `float Tensor`. opt: target: A `Tensor` with the same length in the first dimension as the `tensor`. Labels. ( Dense tensor ) name: A `string`. A name to display in the tensor board web UI. Returns: A 1-D `Tensor` with the same length in the first dimension of the `tensor`. For example, ``` tensor = [[[2., -1., 3.], [3., 1., -2.]], [[1., -1., 2.], [3., 1., -2.]]] target = [[2., 1.], [2., 3.]] tensor.sg_ctc(target=target) => [ 4.45940781 2.43091154] ``` """ assert opt.target is not None, 'target is mandatory.' # default sequence length shape = tf.shape(tensor) opt += tf.sg_opt(seq_len=tf.ones((shape[0],), dtype=tf.sg_intx) * shape[1], merge=True) # ctc loss out = tf.nn.ctc_loss(opt.target.sg_to_sparse(), tensor, opt.seq_len, ctc_merge_repeated=opt.merge, time_major=False) out = tf.identity(out, 'ctc') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def sg_pool1d(tensor, opt): r"""Performs the 1-D pooling on the `tensor`. Args: tensor: A 3-D `Tensor` (automatically passed by decorator). opt: size: A positive `integer` representing `[kernel width]`. Default is 2. stride: A positive `integer`. The number of entries by which the filter is moved right at each step. Default is 2. avg: Boolean. If True, average pooling is applied. Otherwise, max pooling. name: If provided, replace current tensor's name. Returns: A tensor """ # default stride and pad opt += tf.sg_opt(stride=2, pad='VALID') opt += tf.sg_opt(size=opt.stride) if opt.avg: out = tf.nn.avg_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1), (1, opt.stride, 1, 1), opt.pad) else: out = tf.nn.max_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1), (1, opt.stride, 1, 1), opt.pad) return tf.identity(out.sg_squeeze(dim=2), name=opt.name)
def sg_bce(tensor, opt): r"""Returns sigmoid cross entropy loss between `tensor` and `target`. Args: tensor: A `Tensor`. Logits. Unscaled log probabilities. target: A `Tensor` with the same shape and dtype as `tensor`. Labels. Returns: A `Tensor` of the same shape as `tensor` For example, ``` tensor = [[2, -1, 3], [3, 1, -2]] target = [[0, 1, 1], [1, 1, 0]] tensor.sg_bce(target=target) => [[ 2.12692809 1.31326163 0.04858733] [ 0.04858733 0.31326166 0.12692805]] ``` """ assert opt.target is not None, 'target is mandatory.' out = tf.identity( tf.nn.sigmoid_cross_entropy_with_logits(tensor, opt.target), 'bce') # add summary tf.sg_summary_loss(out) return out
def sg_inverse_periodic_shuffle(tensor, opt): # default factor opt += tf.sg_opt(factor=2) # get current shape batch, row, col, channel = tensor.get_shape().as_list() # get target shape and channel num channel_factor = opt.factor * opt.factor # intermediate shape for shuffling shape_1 = [ batch, row / opt.factor, col / opt.factor, channel_factor // opt.factor, channel_factor // opt.factor ] shape_2 = [batch, row / opt.factor, col / opt.factor, channel_factor] # reshape and transpose for periodic shuffling for each channel out = [] for i in range(channel): out.append(tensor[:, :, :, i].sg_expand_dims().sg_reshape( shape=shape_1).sg_transpose(perm=(0, 1, 3, 2, 4)).sg_reshape(shape=shape_2)) # final output out = tf.concat(3, out) return tf.identity(out, name=opt.name)
def sg_periodic_shuffle(tensor, opt): # default factor opt += tf.sg_opt(factor=2) # get current shape batch, row, col, channel = tensor.get_shape().as_list() # get target channel num channel_target = channel / (opt.factor * opt.factor) channel_factor = channel / channel_target # intermediate shape for shuffling shape_1 = [ batch, row, col, channel_factor / opt.factor, channel_factor / opt.factor ] shape_2 = [batch, row * opt.factor, col * opt.factor, 1] # reshape and transpose for periodic shuffling for each channel out = [] for i in range(channel_target): out.append( (tensor[:, :, :, i * channel_factor:(i + 1) * channel_factor]).sg_reshape(shape=shape_1).sg_transpose( perm=(0, 1, 3, 2, 4)).sg_reshape(shape=shape_2)) # final output out = tf.concat(3, out) return tf.identity(out, name=opt.name)
def sg_ce(tensor, opt): opt += tf.sg_opt(one_hot=False) assert opt.target is not None, 'target is mandatory.' if opt.one_hot: out = tf.identity(tf.nn.softmax_cross_entropy_with_logits(tensor, opt.target), 'ce') else: out = tf.identity(tf.nn.sparse_softmax_cross_entropy_with_logits(tensor, opt.target), 'ce') # masking loss if opt.mask: out *= tf.not_equal(opt.target, tf.zeros_like(opt.target)).sg_float() # add summary tf.sg_summary_loss(out) return out
def sg_bce(tensor, opt): assert opt.target is not None, 'target is mandatory.' out = tf.identity(tf.nn.sigmoid_cross_entropy_with_logits(tensor, opt.target), 'bce') # add summary tf.sg_summary_loss(out) return out
def sg_mae(tensor, opt): assert opt.target is not None, 'target is mandatory.' # absolute error out = tf.identity(tf.abs(tensor - opt.target), 'mae') # add summary tf.sg_summary_loss(out) return out
def sg_mse(tensor, opt): assert opt.target is not None, 'target is mandatory.' # squared error out = tf.identity(tf.square(tensor - opt.target), 'mse') # add summary tf.sg_summary_loss(out) return out
def sg_accuracy(tensor, opt): assert opt.target is not None, 'target is mandatory.' opt += tf.sg_opt(k=1) # # calc accuracy out = tf.identity(tf.equal(tensor.sg_argmax(), tf.cast(opt.target, tf.int64)).sg_float(), name='acc') # out = tf.identity(tf.nn.in_top_k(tensor, opt.target, opt.k).sg_float(), name='acc') return out
def sg_identity(tensor, opt): r"""Returns the same tensor Args: tensor: A `Tensor` (automatically given by chain). opt: name : If provided, it replaces current tensor's name Returns: A `Tensor`. Has the same content as `tensor`. """ return tf.identity(tensor, name=opt.name)
def sg_pool1d(tensor, opt): # default stride and pad opt += tf.sg_opt(stride=2, pad='VALID') opt += tf.sg_opt(size=opt.stride) if opt.avg: out = tf.nn.avg_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1), (1, opt.stride, 1, 1), opt.pad) else: out = tf.nn.max_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1), (1, opt.stride, 1, 1), opt.pad) return tf.identity(out.sg_squeeze(dim=2), name=opt.name)
def sg_ctc(tensor, opt): assert opt.target is not None, 'target is mandatory.' # default sequence length shape = tf.shape(tensor) opt += tf.sg_opt(seq_len=tf.ones((shape[0],), dtype=tf.sg_intx) * shape[1]) # ctc loss out = tf.nn.ctc_loss(tensor, opt.target.sg_to_sparse(), opt.seq_len, time_major=False) out = tf.identity(out, 'ctc') # add summary tf.sg_summary_loss(out) return out
def sg_pool(tensor, opt): r"""Performs the 2-D pooling on the `tensor`. Mostly used with sg_conv(). Args: tensor: A 4-D `Tensor` (automatically given by chain). opt: size: A tuple or list of integers of length 2 representing `[kernel height, kernel width]`. Can be an int if both values are the same. If not specified, (2, 2) is set implicitly. stride: A tuple or list of integers of length 2 or 4 representing stride dimensions. If the length is 2, i.e., (a, b), the stride is `[1, a, b, 1]`. If the length is 4, i.e., (a, b, c, d), the stride is `[a, b, c, d]`. Can be an int. If the length is an int, i.e., a, the stride is `[1, a, a, 1]`. The default value is [1, 1, 1, 1]. avg: Boolean. If True, average pooling is applied. Otherwise, max pooling. name: If provided, replace current tensor's name. Returns: A `Tensor`. The max pooled output tensor. """ # default stride and pad opt += tf.sg_opt(stride=(1, 2, 2, 1), pad='VALID') # shape stride opt.stride = opt.stride if isinstance( opt.stride, (list, tuple)) else [1, opt.stride, opt.stride, 1] opt.stride = [1, opt.stride[0], opt.stride[1], 1] if len( opt.stride) == 2 else opt.stride # shape size opt += tf.sg_opt(size=opt.stride) opt.size = opt.size if isinstance(opt.size, (list, tuple)) else [1, opt.size, opt.size, 1] opt.size = [1, opt.size[0], opt.size[1], 1] if len( opt.size) == 2 else opt.size if opt.avg: out = tf.nn.avg_pool(tensor, opt.size, opt.stride, opt.pad) else: out = tf.nn.max_pool(tensor, opt.size, opt.stride, opt.pad) return tf.identity(out, name=opt.name)
def sg_hinge(tensor, opt): assert opt.target is not None, 'target is mandatory.' # default margin opt += tf.sg_opt(margin=1) # reshape target shape = tensor.get_shape().as_list() broadcast_shape = [-1] + [1] * (len(shape) - 2) + [shape[-1]] target = tf.cast(tf.reshape(opt.target, broadcast_shape), tf.sg_floatx) # hinge loss out = tf.identity(tf.maximum(opt.margin - target * tensor, 0), 'hinge') # add summary tf.sg_summary_loss(out) return out
def sg_periodic_shuffle(tensor, opt): r""" Periodic shuffle transformation for SubPixel CNN. (see [Shi et al. 2016](http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Shi_Real-Time_Single_Image_CVPR_2016_paper.pdf) Args: tensor: A tensor (automatically given by chain). opt: factor: factor to multiply shape by. Default is 2. name : If provided, it replaces current tensor's name. Returns: A tensor """ # default factor opt += tf.sg_opt(factor=2) # get current shape batch, row, col, channel = tensor.get_shape().as_list() # get target channel num channel_target = channel // (opt.factor * opt.factor) channel_factor = channel // channel_target # intermediate shape for shuffling shape_1 = [ batch, row, col, channel_factor // opt.factor, channel_factor // opt.factor ] shape_2 = [batch, row * opt.factor, col * opt.factor, 1] # reshape and transpose for periodic shuffling for each channel out = [] for i in range(channel_target): out.append( (tensor[:, :, :, i * channel_factor:(i + 1) * channel_factor]).sg_reshape(shape=shape_1).sg_transpose( perm=(0, 1, 3, 2, 4)).sg_reshape(shape=shape_2)) # final output out = tf.concat(axis=3, values=out) return tf.identity(out, name=opt.name)
def sg_hinge(tensor, opt): r"""Returns hinge loss between `tensor` and `target`. Args: tensor: A `Tensor`. opt: target: A `Tensor`. Labels. margin: An int. Maximum margin. Default is 1. name: A `string`. A name to display in the tensor board web UI. Returns: A `Tensor`. For example, ``` tensor = [[30, 10, 40], [13, 30, 42]] target = [[0, 0, 1], [0, 1, 0]] tensor.sg_hinge(target=target, one_hot=True) => [[ 1. 1. 0.] [ 1. 0. 1.]] ``` """ assert opt.target is not None, 'target is mandatory.' # default margin opt += tf.sg_opt(margin=1) # reshape target shape = tensor.get_shape().as_list() broadcast_shape = [-1] + [1] * (len(shape) - 2) + [shape[-1]] target = tf.cast(tf.reshape(opt.target, broadcast_shape), tf.sg_floatx) # hinge loss out = tf.identity(tf.maximum(opt.margin - target * tensor, 0), 'hinge') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def sg_pool(tensor, opt): # default stride and pad opt += tf.sg_opt(stride=(1, 2, 2, 1), pad='VALID') # shape stride opt.stride = opt.stride if isinstance( opt.stride, (list, tuple)) else [1, opt.stride, opt.stride, 1] opt.stride = [1, opt.stride[0], opt.stride[1], 1] if len( opt.stride) == 2 else opt.stride # shape size opt += tf.sg_opt(size=opt.stride) opt.size = opt.size if isinstance(opt.size, (list, tuple)) else [1, opt.size, opt.size, 1] opt.size = [1, opt.size[0], opt.size[1], 1] if len( opt.size) == 2 else opt.size if opt.avg: out = tf.nn.avg_pool(tensor, opt.size, opt.stride, opt.pad) else: out = tf.nn.max_pool(tensor, opt.size, opt.stride, opt.pad) return tf.identity(out, name=opt.name)
def sg_ctc(tensor, opt): r"""Returns softmax cross entropy loss between `tensor` and `target`. Args: tensor: A `Tensor`. Logits. Unscaled log probabilities. target: A `Tensor` with the same length in the first dimension as the `tensor`. Labels. ( Dense tensor ) Returns: A 1-D `Tensor` with the same shape as `tensor`. For example, ``` tensor = [[[2, -1, 3], [3, 1, -2]]] target = [[2, 1]] tensor.sg_ce(target=target, one_hot=True) => [ 31.32656264 64.13284527] ``` """ assert opt.target is not None, 'target is mandatory.' # default sequence length shape = tf.shape(tensor) opt += tf.sg_opt(seq_len=tf.ones((shape[0], ), dtype=tf.sg_intx) * shape[1]) # ctc loss out = tf.nn.ctc_loss(tensor, opt.target.sg_to_sparse(), opt.seq_len, time_major=False) out = tf.identity(out, 'ctc') # add summary tf.sg_summary_loss(out) return out
def wrapper(tensor, **kwargs): r"""Manages arguments of `tf.sg_opt`. Args: tensor: automatically passed by decorator kwargs: in_dim: An integer. The size of input dimension, which is set to the last one by default. dim: An integer. The size of output dimension. Has the same value as in_dim by default. ln: Boolean. If True, layer normalization is applied. bias: Boolean. If True, biases are added. As a default, it is set to True name: A name for the layer. As a default, the function name is assigned. reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. """ # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], dout=0) # disable bias when normalization on opt += tf.sg_opt(bias=not opt.ln) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', 'lyr-') # find existing layer names exist_layers = [] for t in tf.global_variables(): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'lyr-' prefix with tf.variable_scope(opt.name, reuse=opt.reuse) as scope: # call layer function out = func(tensor, opt) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if scope.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def sg_identity(tensor, opt): return tf.identity(tensor, name=opt.name)
def wrapper(tensor, **kwargs): r"""Manages arguments of `tf.sg_opt`. Args: tensor: A `tensor` (automatically passed by decorator). kwargs: shape: A list of integers. The shape of `tensor`. Inferred if not specified. in_dim: An integer. The size of input dimension, which is set to the last one by default. dim: An integer. The size of output dimension. Has the same value as in_dim by default. bn: Boolean. If True, batch normalization is applied. ln: Boolean. If True, layer normalization is applied. dout: A float of range [0, 100). A dropout rate. Set to 0 by default. bias: Boolean. If True, biases are added. As a default, it is set to True name: A name for the layer. As a default, the function name is assigned. act: A name of activation function. e.g., `sigmoid`, `tanh`, etc. reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. """ from . import sg_initializer as init from . import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0) assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.global_variables(): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'lyr-' prefix with tf.variable_scope(opt.name, reuse=opt.reuse) as scope: # call layer function out = func(tensor, opt) # apply batch normalization if opt.bn: # offset, scale parameter beta = init.constant('beta', opt.dim, summary=False) gamma = init.constant('gamma', opt.dim, value=1, summary=False) # offset, scale parameter mean_running = init.constant('mean', opt.dim, summary=False) variance_running = init.constant('variance', opt.dim, value=1, summary=False) # calc batch mean, variance mean, variance = tf.nn.moments( out, axes=range(len(out.get_shape()) - 1)) # update running mean, variance def update_running_stat(): decay = 0.99 update_op = [ mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay)) ] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance) # select mean, variance by training phase m, v = tf.cond( _phase, update_running_stat, # updated running stat and batch mean, variance lambda: (mean_running, variance_running)) # saved mean, variance # apply batch normalization out = tf.nn.batch_normalization(out, m, v, beta, gamma, tf.sg_eps) # apply normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim, summary=False) gamma = init.constant('gamma', opt.dim, value=1, summary=False) # calc layer mean, variance for final axis mean, variance = tf.nn.moments(out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter out = gamma * out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if not scope.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def wrapper(tensor, **kwargs): import sg_initializer as init import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0) assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.get_collection(tf.GraphKeys.VARIABLES): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + 'layers/' + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'layers/' prefix with tf.variable_scope('layers', reuse=opt.reuse): with tf.variable_scope(opt.name): # call layer function out = func(tensor, opt) # apply batch normalization if opt.bn: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # offset, scale parameter mean_running = init.constant('mean', opt.dim) variance_running = init.constant('variance', opt.dim, value=1) # calc batch mean, variance mean, variance = tf.nn.moments( out, axes=range(len(out.get_shape()) - 1)) # update running mean, variance def update_running_stat(): decay = 0.99 update_op = [ mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay)) ] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance) # select mean, variance by training phase m, v = tf.cond( _phase, update_running_stat, # updated running stat and batch mean, variance lambda: (mean_running, variance_running) ) # saved mean, variance # apply batch normalization out = tf.nn.batch_normalization(out, m, v, beta, gamma, tf.sg_eps) # apply normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # calc layer mean, variance for final axis mean, variance = tf.nn.moments( out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter out = gamma * out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if opt.reuse is None or not opt.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def wrapper(tensor, **kwargs): # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], dout=0) # disable bias when normalization on opt += tf.sg_opt(bias=not opt.ln) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.get_collection(tf.GraphKeys.VARIABLES): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + 'layers/' + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'layers/' prefix with tf.variable_scope('layers', reuse=opt.reuse): with tf.variable_scope(opt.name): # call layer function out = func(tensor, opt) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if opt.reuse is None or not opt.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out