def __call__(self, x_t, state, size, scope=None, reuse_vars=False): (prev_c, prev_h) = state scope = scope or tf.get_variable_scope() print("____reuse_______", reuse_vars) with tf.variable_scope(scope, reuse=True): w_ic = tf.get_variable("w_ic") w_fc = tf.get_variable("w_fc") w_oc = tf.get_variable("w_oc") with tf.sg_context(dev=self._dev, reuse=reuse_vars): i = x_t.sg_conv1d_gpus(name = "ix_",size=size)+\ prev_h.sg_conv1d_gpus(name = "ih_",size=size)+\ prev_c*w_ic f = x_t.sg_aconv1d_gpus(name = "fx_",size=size)+\ prev_h.sg_aconv1d_gpus(name = "fh_",size=size)+\ prev_c*w_fc c = x_t.sg_conv1d_gpus(name = "cx_",size=size)+\ prev_h.sg_conv1d_gpus(name = "ch_",size=size) o = x_t.sg_conv1d_gpus(name = "ox_",size=size)+\ prev_h.sg_conv1d_gpus(name = "oh_",size=size)+\ prev_c*w_oc new_c = prev_c * tf.sigmoid(f) + tf.sigmoid(i) * self._activation(c) new_h = self._activation(new_c) * tf.sigmoid(o) return (new_c, new_h)
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx, summary=True): r"""Creates a tensor variable of which initial values are of an orthogonal ndarray. See [Saxe et al. 2014.](http://arxiv.org/pdf/1312.6120.pdf) Args: name: The name of new variable. shape: A tuple/list of integers. scale: A Python scalar. dtype: Either float32 or float64. summary: If True, add this constant to tensor board summary. Returns: A `Variable`. """ flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) # create variable x = tf.get_variable(name, initializer=tf.constant(scale * q[:shape[0], :shape[1]], dtype=dtype)) # add summary if not tf.get_variable_scope().reuse and summary: tf.sg_summary_param(x) return x
def constant(name, shape, value=0, dtype=tf.sg_floatx, summary=True): r"""Creates a tensor variable of which initial values are `value` and shape is `shape`. Args: name: The name of new variable. shape: A tuple/list of integers or an integer. If shape is an integer, it is converted to a list. value: A Python scalar. All elements of the initialized variable will be set to this value. Default is 0. dtype: The data type. Only floating point types are supported. Default is float32. summary: If True, add this constant to tensor board summary. Returns: A `Variable`. """ shape = shape if isinstance(shape, (tuple, list)) else [shape] x = tf.get_variable(name, shape, dtype=dtype, initializer=tf.constant_initializer(value)) # add summary if not tf.get_variable_scope().reuse and summary: tf.sg_summary_param(x) return x
def uniform(name, shape, scale=0.05, dtype=tf.sg_floatx, summary=True): r"""Creates a tensor variable of which initial values are random numbers based on uniform distribution. Note that the default value of `scale` (=0.05) is different from the min/max values (=0.0, 1.0) of tf.random_uniform_initializer. Args: name: The name of the new variable. shape: A tuple/list of integers or an integer. If shape is an integer, it's converted to a list. scale: A Python scalar. All initial values should be in range `[-scale, scale)`. Default is .05. dtype: The data type. Only floating point types are supported. Default is float32. summary: If True, add this constant to tensor board summary. Returns: A `Variable`. """ shape = shape if isinstance(shape, (tuple, list)) else [shape] x = tf.get_variable(name, shape, dtype=dtype, initializer=tf.random_uniform_initializer( minval=-scale, maxval=scale)) # add summary if not tf.get_variable_scope().reuse and summary: tf.sg_summary_param(x) return x
def identity(name, dim, scale=1, dtype=tf.sg_floatx): x = tf.get_variable(name, initializer=tf.constant(np.eye(dim) * scale, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx): r"""Returns a random orthogonal initializer. See Saxe et al. 2014 `http://arxiv.org/pdf/1312.6120.pdf` Args: name: A string. The name of the new or existing variable. shape: A list or tuple of integers. scale: A Python scalr. dtype = A float32 or float64. Returns: A `Tensor` variable. """ flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) # create variable x = tf.get_variable(name, initializer=tf.constant(scale * q[:shape[0], :shape[1]], dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def identity(name, dim, scale=1, dtype=tf.sg_floatx, summary=True): r"""Creates a tensor variable of which initial values are of an identity matrix. Note that the default value of `scale` (=0.05) is different from the min/max values (=0.0, 1.0) of tf.random_uniform_initializer. For example, ``` identity("identity", 3, 2) => [[2. 0. 0.] [0. 2. 0.] [0. 0. 2.]] ``` Args: name: The name of new variable. dim: An int. The size of the first and second dimension of the output tensor. scale: A Python scalar. The value on the diagonal. dtype: The type of the elements of the resulting tensor. summary: If True, add this constant to tensor board summary. Returns: A 2-D `Variable`. """ x = tf.get_variable(name, initializer=tf.constant(np.eye(dim) * scale, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse and summary: tf.sg_summary_param(x) return x
def external(name, value, dtype=tf.sg_floatx, summary=True): r"""Creates a tensor variable of which initial values are `value`. For example, ``` external("external", [3,3,1,2]) => [3. 3. 1. 2.] ``` Args: name: The name of new variable. value: A constant value (or list) of output type `dtype`. dtype: The type of the elements of the resulting tensor. summary: If True, add this constant to tensor board summary. Returns: A `Variable`. Has the same contents as `value` of `dtype`. """ # create variable x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse and summary: tf.sg_summary_param(x) return x
def uniform(name, shape, scale=0.05, dtype=tf.sg_floatx): shape = shape if isinstance(shape, (tuple, list)) else [shape] x = tf.get_variable(name, shape, dtype=dtype, initializer=tf.random_uniform_initializer(minval=-scale, maxval=scale)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def external(name, value, dtype=tf.sg_floatx): # create variable x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def constant(name, shape, value=0, dtype=tf.sg_floatx): shape = shape if isinstance(shape, (tuple, list)) else [shape] x = tf.get_variable(name, shape, dtype=dtype, initializer=tf.constant_initializer(value)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def _linear(self, arys): scope = tf.get_variable_scope() with tf.variable_scope(scope, reuse=True): w_i2h = tf.get_variable("w_i2h") w_h2h = tf.get_variable("w_h2h") w_b = tf.get_variable("w_b") if self._bias == True else 0 i2h = tf.matmul(arys[0], w_i2h) h2h = tf.matmul(arys[1], w_h2h) out = i2h + h2h + w_b return out
def constant(name, shape, value=0, dtype=tf.sg_floatx): r"""Returns an initializer of `shape` with all elements set to a scalar `value`. """ shape = shape if isinstance(shape, (tuple, list)) else [shape] x = tf.get_variable(name, shape, dtype=dtype, initializer=tf.constant_initializer(value)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def uniform(name, shape, scale=0.05, dtype=tf.sg_floatx): r"""Returns an initializer of random numbers based on uniform distribution. Note that the default value of `scale` (=0.05) is different from the min/max values (=0.0, 1.0) of tf.random_uniform_initializer. """ shape = shape if isinstance(shape, (tuple, list)) else [shape] x = tf.get_variable(name, shape, dtype=dtype, initializer=tf.random_uniform_initializer( minval=-scale, maxval=scale)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx): # Sax et aE. ( http://arxiv.org/pdf/1312.6120.pdf ) flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) # create variable x = tf.get_variable(name, initializer=tf.constant(scale * q[:shape[0], :shape[1]], dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def external(name, value, dtype=tf.sg_floatx): r"""Returns an initializer of `value`. Args: name: A string. The name of the new or existing variable. value: A constant value (or array) of output type `dtype`. dtype: The type of the elements of the resulting tensor. (optional) Returns: A `Tensor` variable. """ # create variable x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def sg_summary_image(tensor, prefix=None, name=None): r"""Register `tensor` to summary report as `image` Args: tensor: A tensor to log as image prefix: A `string`. A prefix to display in the tensor board web UI. name: A `string`. A name to display in the tensor board web UI. Returns: None """ # defaults prefix = '' if prefix is None else prefix + '/' # summary name name = prefix + _pretty_name(tensor) if name is None else prefix + name # summary statistics if not tf.get_variable_scope().reuse: tf.summary.image(name + '-im', tensor)
def sg_summary_audio(tensor, sample_rate=16000, prefix=None, name=None): r"""Register `tensor` to summary report as audio Args: tensor: A `Tensor` to log as audio sample_rate : An int. Sample rate to report. Default is 16000. prefix: A `string`. A prefix to display in the tensor board web UI. name: A `string`. A name to display in the tensor board web UI. Returns: None """ # defaults prefix = '' if prefix is None else prefix + '/' # summary name name = prefix + _pretty_name(tensor) if name is None else prefix + name # summary statistics if not tf.get_variable_scope().reuse: tf.summary.audio(name + '-au', tensor, sample_rate)
def identity(name, dim, scale=1, dtype=tf.sg_floatx): r"""Returns an initializer of a 2-D identity tensor. Args: name: A string. The name of the new or existing variable. dim: An int. The size of the first and second dimension of the output tensor scale: An int (optional). The value on the diagonal. shape: Shape of the new or existing variable. dtype: A tensor datatype. Returns: A 2-D tensor variable with the value of `scale` on the diagoanl and zeros elsewhere. """ x = tf.get_variable(name, initializer=tf.constant(np.eye(dim) * scale, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def wrapper(tensor, **kwargs): r"""Manages arguments of `tf.sg_opt`. Args: tensor: automatically passed by decorator kwargs: in_dim: An integer. The size of input dimension, which is set to the last one by default. dim: An integer. The size of output dimension. Has the same value as in_dim by default. ln: Boolean. If True, layer normalization is applied. bias: Boolean. If True, biases are added. As a default, it is set to True name: A name for the layer. As a default, the function name is assigned. reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. """ # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], dout=0) # disable bias when normalization on opt += tf.sg_opt(bias=not opt.ln) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', 'lyr-') # find existing layer names exist_layers = [] for t in tf.global_variables(): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'lyr-' prefix with tf.variable_scope(opt.name, reuse=opt.reuse) as scope: # call layer function out = func(tensor, opt) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if scope.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def wrapper(tensor, **kwargs): r"""Manages arguments of `tf.sg_opt`. Args: tensor: A `tensor` (automatically passed by decorator). kwargs: shape: A list of integers. The shape of `tensor`. Inferred if not specified. in_dim: An integer. The size of input dimension, which is set to the last one by default. dim: An integer. The size of output dimension. Has the same value as in_dim by default. bn: Boolean. If True, batch normalization is applied. ln: Boolean. If True, layer normalization is applied. dout: A float of range [0, 100). A dropout rate. Set to 0 by default. bias: Boolean. If True, biases are added. As a default, it is set to True name: A name for the layer. As a default, the function name is assigned. act: A name of activation function. e.g., `sigmoid`, `tanh`, etc. reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. """ from . import sg_initializer as init from . import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0) assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.global_variables(): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'lyr-' prefix with tf.variable_scope(opt.name, reuse=opt.reuse) as scope: # call layer function out = func(tensor, opt) # apply batch normalization if opt.bn: # offset, scale parameter beta = init.constant('beta', opt.dim, summary=False) gamma = init.constant('gamma', opt.dim, value=1, summary=False) # offset, scale parameter mean_running = init.constant('mean', opt.dim, summary=False) variance_running = init.constant('variance', opt.dim, value=1, summary=False) # calc batch mean, variance mean, variance = tf.nn.moments( out, axes=range(len(out.get_shape()) - 1)) # update running mean, variance def update_running_stat(): decay = 0.99 update_op = [ mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay)) ] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance) # select mean, variance by training phase m, v = tf.cond( _phase, update_running_stat, # updated running stat and batch mean, variance lambda: (mean_running, variance_running)) # saved mean, variance # apply batch normalization out = tf.nn.batch_normalization(out, m, v, beta, gamma, tf.sg_eps) # apply normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim, summary=False) gamma = init.constant('gamma', opt.dim, value=1, summary=False) # calc layer mean, variance for final axis mean, variance = tf.nn.moments(out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter out = gamma * out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if not scope.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def _histogram(name, tensor): if not tf.get_variable_scope().reuse and not tf.sg_get_context().reuse: #val = gen_logging_ops.summary.scalar(name, tensor) #tf.add_to_collection(tf.GraphKeys.SUMMARIES, val) xjiko = 1
def wrapper(tensor, **kwargs): r"""Manages arguments of `tf.sg_opt`. Args: tensor: A `tensor` (automatically passed by decorator). kwargs: shape: A list of integers. The shape of `tensor`. Inferred if not specified. in_dim: An integer. The size of input dimension, which is set to the last one by default. dim: An integer. The size of output dimension. Has the same value as in_dim by default. bn: Boolean. If True, batch normalization is applied. ln: Boolean. If True, layer normalization is applied. scale: If true, multiple by a trainable gamma variable. When the activation is linear (relu included), this can be disabled because it can be implicitly learned by the next layer. The default is True. dout: A float of range [0, 100). A dropout rate. Set to 0 by default. bias: Boolean. If True, biases are added. As a default, it is set to True name: A name for the layer. As a default, the function name is assigned. act: A name of activation function. e.g., `sigmoid`, `tanh`, etc. reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. regularizer: A string. None, 'l1' or 'l2'. The default is None summary: If True, summaries are added. The default is True. """ from . import sg_initializer as init from . import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + sg_get_context() # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0, summary=True, scale=True) if opt.regularizer == 'l1': opt.regularizer = lambda x: tf.reduce_mean(tf.abs(x)) elif opt.regularizer == 'l2': opt.regularizer = lambda x: tf.square( tf.reduce_mean(tf.square(x))) else: opt.regularizer = None assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.global_variables(): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) with tf.variable_scope(opt.name, reuse=opt.reuse) as scope: # call layer function out = func(tensor, opt) out_shape = out.get_shape() # apply batch normalization if opt.bn: beta = init.constant('beta', opt.dim, summary=opt.summary) gamma = init.constant('gamma', opt.dim, value=1, summary=opt.summary, trainable=opt.scale) # offset, scale parameter ( for inference ) mean_running = init.constant('mean', opt.dim, trainable=False, summary=opt.summary) variance_running = init.constant('variance', opt.dim, value=1, trainable=False, summary=opt.summary) # use fused batch norm if ndims in [2, 3, 4] if out_shape.ndims in [2, 3, 4]: # add HW dims if necessary, fused_batch_norm requires shape to be NHWC if out_shape.ndims == 2: out = tf.expand_dims(out, axis=1) out = tf.expand_dims(out, axis=2) elif out_shape.ndims == 3: out = tf.expand_dims(out, axis=2) fused_eps = tf.sg_eps if tf.sg_eps > 1e-5 else 1e-5 out, mean, variance = tf.cond( _phase, lambda: tf.nn.fused_batch_norm( out, gamma, beta, epsilon=fused_eps), lambda: tf.nn.fused_batch_norm(out, gamma, beta, mean=mean_running, variance= variance_running, epsilon=fused_eps, is_training=False), ) # restore original shape if HW dims was added if out_shape.ndims == 2: out = tf.squeeze(out, axis=[1, 2]) elif out_shape.ndims == 3: out = tf.squeeze(out, axis=2) # fallback to naive batch norm else: mean, variance = tf.nn.moments( out, axes=list(range(len(out.get_shape()) - 1))) out = tf.cond( _phase, lambda: tf.nn.batch_normalization( out, mean, variance, beta, gamma, tf.sg_eps), lambda: tf.nn.batch_normalization( out, mean_running, variance_running, beta, gamma, tf.sg_eps)) decay = 0.99 tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, mean_running.assign(mean_running * decay + mean * (1 - decay))) tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, variance_running.assign(variance_running * decay + variance * (1 - decay))) # apply layer normalization if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim, summary=opt.summary) if opt.scale: gamma = init.constant('gamma', opt.dim, value=1, summary=opt.summary) # calc layer mean, variance for final axis mean, variance = tf.nn.moments(out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter if opt.scale: out = gamma * out + beta else: out = out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if opt.summary: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + sg_get_context(), prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def _scalar(name, tensor): if not tf.get_variable_scope().reuse and not tf.sg_get_context().reuse: val = gen_logging_ops._scalar_summary(name, tensor) tf.add_to_collection(tf.GraphKeys.SUMMARIES, val)
def wrapper(tensor, **kwargs): import sg_initializer as init import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0) assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.get_collection(tf.GraphKeys.VARIABLES): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + 'layers/' + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'layers/' prefix with tf.variable_scope('layers', reuse=opt.reuse): with tf.variable_scope(opt.name): # call layer function out = func(tensor, opt) # apply batch normalization if opt.bn: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # offset, scale parameter mean_running = init.constant('mean', opt.dim) variance_running = init.constant('variance', opt.dim, value=1) # calc batch mean, variance mean, variance = tf.nn.moments( out, axes=range(len(out.get_shape()) - 1)) # update running mean, variance def update_running_stat(): decay = 0.99 update_op = [ mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay)) ] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance) # select mean, variance by training phase m, v = tf.cond( _phase, update_running_stat, # updated running stat and batch mean, variance lambda: (mean_running, variance_running) ) # saved mean, variance # apply batch normalization out = tf.nn.batch_normalization(out, m, v, beta, gamma, tf.sg_eps) # apply normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # calc layer mean, variance for final axis mean, variance = tf.nn.moments( out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter out = gamma * out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if opt.reuse is None or not opt.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def wrapper(tensor, **kwargs): # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], dout=0) # disable bias when normalization on opt += tf.sg_opt(bias=not opt.ln) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.get_collection(tf.GraphKeys.VARIABLES): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + 'layers/' + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'layers/' prefix with tf.variable_scope('layers', reuse=opt.reuse): with tf.variable_scope(opt.name): # call layer function out = func(tensor, opt) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if opt.reuse is None or not opt.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out