Beispiel #1
0
    def __call__(self, x_t, state, size, scope=None, reuse_vars=False):

        (prev_c, prev_h) = state
        scope = scope or tf.get_variable_scope()
        print("____reuse_______", reuse_vars)
        with tf.variable_scope(scope, reuse=True):
            w_ic = tf.get_variable("w_ic")
            w_fc = tf.get_variable("w_fc")
            w_oc = tf.get_variable("w_oc")

        with tf.sg_context(dev=self._dev, reuse=reuse_vars):
            i = x_t.sg_conv1d_gpus(name = "ix_",size=size)+\
            prev_h.sg_conv1d_gpus(name = "ih_",size=size)+\
            prev_c*w_ic

            f = x_t.sg_aconv1d_gpus(name = "fx_",size=size)+\
            prev_h.sg_aconv1d_gpus(name = "fh_",size=size)+\
            prev_c*w_fc

            c = x_t.sg_conv1d_gpus(name = "cx_",size=size)+\
            prev_h.sg_conv1d_gpus(name = "ch_",size=size)

            o = x_t.sg_conv1d_gpus(name = "ox_",size=size)+\
            prev_h.sg_conv1d_gpus(name = "oh_",size=size)+\
            prev_c*w_oc

        new_c = prev_c * tf.sigmoid(f) + tf.sigmoid(i) * self._activation(c)
        new_h = self._activation(new_c) * tf.sigmoid(o)

        return (new_c, new_h)
Beispiel #2
0
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx, summary=True):
    r"""Creates a tensor variable of which initial values are of
    an orthogonal ndarray.
    
    See [Saxe et al. 2014.](http://arxiv.org/pdf/1312.6120.pdf)
    
    Args:
      name: The name of new variable.
      shape: A tuple/list of integers. 
      scale: A Python scalar.
      dtype: Either float32 or float64.
      summary: If True, add this constant to tensor board summary.
    
    Returns:
      A `Variable`.
    """
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    # pick the one with the correct shape
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    # create variable
    x = tf.get_variable(name,
                        initializer=tf.constant(scale *
                                                q[:shape[0], :shape[1]],
                                                dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse and summary:
        tf.sg_summary_param(x)
    return x
Beispiel #3
0
def constant(name, shape, value=0, dtype=tf.sg_floatx, summary=True):
    r"""Creates a tensor variable of which initial values are `value` and shape is `shape`.

    Args:
      name: The name of new variable.
      shape: A tuple/list of integers or an integer. 
        If shape is an integer, it is converted to a list.
      value: A Python scalar. All elements of the initialized variable
        will be set to this value. Default is 0.
      dtype: The data type. Only floating point types are supported. Default is float32.
      summary: If True, add this constant to tensor board summary.

    Returns:
      A `Variable`.

    """
    shape = shape if isinstance(shape, (tuple, list)) else [shape]
    x = tf.get_variable(name,
                        shape,
                        dtype=dtype,
                        initializer=tf.constant_initializer(value))
    # add summary
    if not tf.get_variable_scope().reuse and summary:
        tf.sg_summary_param(x)
    return x
Beispiel #4
0
def uniform(name, shape, scale=0.05, dtype=tf.sg_floatx, summary=True):
    r"""Creates a tensor variable of which initial values are 
    random numbers based on uniform distribution.
    
    Note that the default value of `scale` (=0.05) is different from 
    the min/max values (=0.0, 1.0) of tf.random_uniform_initializer.
    
    Args:
      name: The name of the new variable.
      shape: A tuple/list of integers or an integer. 
        If shape is an integer, it's converted to a list.
      scale: A Python scalar. All initial values should be in range `[-scale, scale)`. Default is .05.
      dtype: The data type. Only floating point types are supported. Default is float32.
      summary: If True, add this constant to tensor board summary.
    
    Returns:
      A `Variable`.
    """
    shape = shape if isinstance(shape, (tuple, list)) else [shape]
    x = tf.get_variable(name,
                        shape,
                        dtype=dtype,
                        initializer=tf.random_uniform_initializer(
                            minval=-scale, maxval=scale))
    # add summary
    if not tf.get_variable_scope().reuse and summary:
        tf.sg_summary_param(x)
    return x
Beispiel #5
0
def identity(name, dim, scale=1, dtype=tf.sg_floatx):
    x = tf.get_variable(name,
                        initializer=tf.constant(np.eye(dim) * scale, dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #6
0
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx):
    r"""Returns a random orthogonal initializer.
    See Saxe et al. 2014 `http://arxiv.org/pdf/1312.6120.pdf`
    
    Args:
      name: A string. The name of the new or existing variable.
      shape: A list or tuple of integers.
      scale: A Python scalr.
      dtype = A float32 or float64.
    
    Returns:
      A `Tensor` variable.
    """
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    # pick the one with the correct shape
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    # create variable
    x = tf.get_variable(name,
                        initializer=tf.constant(scale *
                                                q[:shape[0], :shape[1]],
                                                dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #7
0
def identity(name, dim, scale=1, dtype=tf.sg_floatx, summary=True):
    r"""Creates a tensor variable of which initial values are of
    an identity matrix.
    
    Note that the default value of `scale` (=0.05) is different from 
    the min/max values (=0.0, 1.0) of tf.random_uniform_initializer.
    
    For example,
    
    ```
    identity("identity", 3, 2) =>
    [[2. 0. 0.]
     [0. 2. 0.]
     [0. 0. 2.]]
    ```
    
    Args:
      name: The name of new variable.
      dim: An int. The size of the first and second dimension of the output tensor.
      scale: A Python scalar. The value on the diagonal.
      dtype: The type of the elements of the resulting tensor.
      summary: If True, add this constant to tensor board summary.
    
    Returns:
      A 2-D `Variable`.
    """
    x = tf.get_variable(name,
                        initializer=tf.constant(np.eye(dim) * scale,
                                                dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse and summary:
        tf.sg_summary_param(x)
    return x
Beispiel #8
0
def external(name, value, dtype=tf.sg_floatx, summary=True):
    r"""Creates a tensor variable of which initial values are `value`.
    
    For example,
    
    ```
    external("external", [3,3,1,2])
    => [3. 3. 1. 2.]
    ```
    
    Args:
      name: The name of new variable.
      value: A constant value (or list) of output type `dtype`.
      dtype: The type of the elements of the resulting tensor.
      summary: If True, add this constant to tensor board summary.
    
    Returns:
      A `Variable`. Has the same contents as `value` of `dtype`. 
    """
    # create variable
    x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse and summary:
        tf.sg_summary_param(x)
    return x
Beispiel #9
0
def uniform(name, shape, scale=0.05, dtype=tf.sg_floatx):
    shape = shape if isinstance(shape, (tuple, list)) else [shape]
    x = tf.get_variable(name, shape, dtype=dtype,
                        initializer=tf.random_uniform_initializer(minval=-scale, maxval=scale))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #10
0
def external(name, value, dtype=tf.sg_floatx):
    # create variable
    x = tf.get_variable(name,
                        initializer=tf.constant(value, dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #11
0
def constant(name, shape, value=0, dtype=tf.sg_floatx):
    shape = shape if isinstance(shape, (tuple, list)) else [shape]
    x = tf.get_variable(name, shape, dtype=dtype,
                        initializer=tf.constant_initializer(value))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #12
0
 def _linear(self, arys):
     scope = tf.get_variable_scope()
     with tf.variable_scope(scope, reuse=True):
         w_i2h = tf.get_variable("w_i2h")
         w_h2h = tf.get_variable("w_h2h")
         w_b = tf.get_variable("w_b") if self._bias == True else 0
     i2h = tf.matmul(arys[0], w_i2h)
     h2h = tf.matmul(arys[1], w_h2h)
     out = i2h + h2h + w_b
     return out
Beispiel #13
0
def constant(name, shape, value=0, dtype=tf.sg_floatx):
    r"""Returns an initializer of `shape` with all elements set to a scalar `value`.
    """
    shape = shape if isinstance(shape, (tuple, list)) else [shape]
    x = tf.get_variable(name,
                        shape,
                        dtype=dtype,
                        initializer=tf.constant_initializer(value))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #14
0
def uniform(name, shape, scale=0.05, dtype=tf.sg_floatx):
    r"""Returns an initializer of random numbers based on uniform distribution.
    Note that the default value of `scale` (=0.05) is different from 
    the min/max values (=0.0, 1.0) of tf.random_uniform_initializer.
    """
    shape = shape if isinstance(shape, (tuple, list)) else [shape]
    x = tf.get_variable(name,
                        shape,
                        dtype=dtype,
                        initializer=tf.random_uniform_initializer(
                            minval=-scale, maxval=scale))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #15
0
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx):
    # Sax et aE. ( http://arxiv.org/pdf/1312.6120.pdf )
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    # pick the one with the correct shape
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    # create variable
    x = tf.get_variable(name,
                        initializer=tf.constant(scale * q[:shape[0], :shape[1]], dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #16
0
def external(name, value, dtype=tf.sg_floatx):
    r"""Returns an initializer of `value`.
    Args:
      name: A string. The name of the new or existing variable.
      value: A constant value (or array) of output type `dtype`.
      dtype: The type of the elements of the resulting tensor. (optional)
    
    Returns:
      A `Tensor` variable.  
    """
    # create variable
    x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #17
0
def sg_summary_image(tensor, prefix=None, name=None):
    r"""Register `tensor` to summary report as `image`

    Args:
      tensor: A tensor to log as image
      prefix: A `string`. A prefix to display in the tensor board web UI.
      name: A `string`. A name to display in the tensor board web UI.

    Returns:
        None
    """
    # defaults
    prefix = '' if prefix is None else prefix + '/'
    # summary name
    name = prefix + _pretty_name(tensor) if name is None else prefix + name
    # summary statistics
    if not tf.get_variable_scope().reuse:
        tf.summary.image(name + '-im', tensor)
Beispiel #18
0
def sg_summary_audio(tensor, sample_rate=16000, prefix=None, name=None):
    r"""Register `tensor` to summary report as audio

    Args:
      tensor: A `Tensor` to log as audio
      sample_rate : An int. Sample rate to report. Default is 16000.
      prefix: A `string`. A prefix to display in the tensor board web UI.
      name: A `string`. A name to display in the tensor board web UI.

    Returns:
        None
    """
    # defaults
    prefix = '' if prefix is None else prefix + '/'
    # summary name
    name = prefix + _pretty_name(tensor) if name is None else prefix + name
    # summary statistics
    if not tf.get_variable_scope().reuse:
        tf.summary.audio(name + '-au', tensor, sample_rate)
Beispiel #19
0
def identity(name, dim, scale=1, dtype=tf.sg_floatx):
    r"""Returns an initializer of a 2-D identity tensor.
    
    Args:
      name: A string. The name of the new or existing variable.
      dim: An int. The size of the first and second dimension of the output tensor
      scale: An int (optional). The value on the diagonal. 
      shape: Shape of the new or existing variable.
      dtype: A tensor datatype.
    
    Returns:
      A 2-D tensor variable with the value of `scale` on the diagoanl and zeros elsewhere.   
    """
    x = tf.get_variable(name,
                        initializer=tf.constant(np.eye(dim) * scale,
                                                dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #20
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.

        Args:
          tensor: automatically passed by decorator
          kwargs:
              in_dim: An integer. The size of input dimension, which is set to the last one by default.
              dim: An integer. The size of output dimension. Has the same value as in_dim by default.
              ln: Boolean. If True, layer normalization is applied.
              bias: Boolean. If True, biases are added. As a default, it is set to True
              name: A name for the layer. As a default, the function name is assigned.
              reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope
                as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
        """

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             dout=0)
            # disable bias when normalization on
            opt += tf.sg_opt(bias=not opt.ln)
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', 'lyr-')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'lyr-' prefix
        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            if scope.reuse:
                tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + _context,
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
Beispiel #21
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.
        
        Args:
          tensor: A `tensor` (automatically passed by decorator).
          kwargs:
            shape:  A list of integers. The shape of `tensor`. Inferred if not specified.
            in_dim: An integer. The size of input dimension, which is set to the last one by default.
            dim: An integer. The size of output dimension. Has the same value as in_dim by default.
            bn: Boolean. If True, batch normalization is applied.
            ln: Boolean. If True, layer normalization is applied.
            dout: A float of range [0, 100). A dropout rate. Set to 0 by default.
            bias: Boolean. If True, biases are added. As a default, it is set to True 
            name: A name for the layer. As a default, the function name is assigned.
            act: A name of activation function. e.g., `sigmoid`, `tanh`, etc.
            reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope 
              as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
        """

        from . import sg_initializer as init
        from . import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0)
            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'lyr-' prefix
        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)

            # apply batch normalization
            if opt.bn:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim, summary=False)
                gamma = init.constant('gamma', opt.dim, value=1, summary=False)

                # offset, scale parameter
                mean_running = init.constant('mean', opt.dim, summary=False)
                variance_running = init.constant('variance',
                                                 opt.dim,
                                                 value=1,
                                                 summary=False)

                # calc batch mean, variance
                mean, variance = tf.nn.moments(
                    out, axes=range(len(out.get_shape()) - 1))

                # update running mean, variance
                def update_running_stat():
                    decay = 0.99
                    update_op = [
                        mean_running.assign(mean_running * decay + mean *
                                            (1 - decay)),
                        variance_running.assign(variance_running * decay +
                                                variance * (1 - decay))
                    ]
                    with tf.control_dependencies(update_op):
                        return tf.identity(mean), tf.identity(variance)

                # select mean, variance by training phase
                m, v = tf.cond(
                    _phase,
                    update_running_stat,  # updated running stat and batch mean, variance
                    lambda:
                    (mean_running, variance_running))  # saved mean, variance

                # apply batch normalization
                out = tf.nn.batch_normalization(out, m, v, beta, gamma,
                                                tf.sg_eps)

            # apply normalization parameters
            if opt.ln:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim, summary=False)
                gamma = init.constant('gamma', opt.dim, value=1, summary=False)

                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(out,
                                               axes=[len(out.get_shape()) - 1],
                                               keep_dims=True)

                # apply normalization
                out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                # apply parameter
                out = gamma * out + beta

            # apply activation
            if opt.act:
                out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            if not scope.reuse:
                tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + _context,
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
Beispiel #22
0
def _histogram(name, tensor):
    if not tf.get_variable_scope().reuse and not tf.sg_get_context().reuse:
        #val = gen_logging_ops.summary.scalar(name, tensor)
        #tf.add_to_collection(tf.GraphKeys.SUMMARIES, val)
        xjiko = 1
Beispiel #23
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.

        Args:
          tensor: A `tensor` (automatically passed by decorator).
          kwargs:
            shape:  A list of integers. The shape of `tensor`. Inferred if not specified.
            in_dim: An integer. The size of input dimension, which is set to the last one by default.
            dim: An integer. The size of output dimension. Has the same value as in_dim by default.
            bn: Boolean. If True, batch normalization is applied.
            ln: Boolean. If True, layer normalization is applied.
            scale: If true, multiple by a trainable gamma variable. When the activation is
              linear (relu included), this can be disabled because it can be implicitly
              learned by the next layer. The default is True.
            dout: A float of range [0, 100). A dropout rate. Set to 0 by default.
            bias: Boolean. If True, biases are added. As a default, it is set to True
            name: A name for the layer. As a default, the function name is assigned.
            act: A name of activation function. e.g., `sigmoid`, `tanh`, etc.
            reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope
              as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
            regularizer:  A string. None, 'l1' or 'l2'. The default is None
            summary: If True, summaries are added. The default is True.
        """

        from . import sg_initializer as init
        from . import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + sg_get_context()

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0,
                             summary=True,
                             scale=True)
            if opt.regularizer == 'l1':
                opt.regularizer = lambda x: tf.reduce_mean(tf.abs(x))
            elif opt.regularizer == 'l2':
                opt.regularizer = lambda x: tf.square(
                    tf.reduce_mean(tf.square(x)))
            else:
                opt.regularizer = None

            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)
            out_shape = out.get_shape()

            # apply batch normalization
            if opt.bn:
                beta = init.constant('beta', opt.dim, summary=opt.summary)
                gamma = init.constant('gamma',
                                      opt.dim,
                                      value=1,
                                      summary=opt.summary,
                                      trainable=opt.scale)

                # offset, scale parameter ( for inference )
                mean_running = init.constant('mean',
                                             opt.dim,
                                             trainable=False,
                                             summary=opt.summary)
                variance_running = init.constant('variance',
                                                 opt.dim,
                                                 value=1,
                                                 trainable=False,
                                                 summary=opt.summary)

                # use fused batch norm if ndims in [2, 3, 4]
                if out_shape.ndims in [2, 3, 4]:
                    # add HW dims if necessary, fused_batch_norm requires shape to be NHWC
                    if out_shape.ndims == 2:
                        out = tf.expand_dims(out, axis=1)
                        out = tf.expand_dims(out, axis=2)
                    elif out_shape.ndims == 3:
                        out = tf.expand_dims(out, axis=2)

                    fused_eps = tf.sg_eps if tf.sg_eps > 1e-5 else 1e-5
                    out, mean, variance = tf.cond(
                        _phase,
                        lambda: tf.nn.fused_batch_norm(
                            out, gamma, beta, epsilon=fused_eps),
                        lambda: tf.nn.fused_batch_norm(out,
                                                       gamma,
                                                       beta,
                                                       mean=mean_running,
                                                       variance=
                                                       variance_running,
                                                       epsilon=fused_eps,
                                                       is_training=False),
                    )

                    # restore original shape if HW dims was added
                    if out_shape.ndims == 2:
                        out = tf.squeeze(out, axis=[1, 2])
                    elif out_shape.ndims == 3:
                        out = tf.squeeze(out, axis=2)

                # fallback to naive batch norm
                else:
                    mean, variance = tf.nn.moments(
                        out, axes=list(range(len(out.get_shape()) - 1)))
                    out = tf.cond(
                        _phase, lambda: tf.nn.batch_normalization(
                            out, mean, variance, beta, gamma, tf.sg_eps),
                        lambda: tf.nn.batch_normalization(
                            out, mean_running, variance_running, beta, gamma,
                            tf.sg_eps))

                decay = 0.99
                tf.add_to_collection(
                    tf.GraphKeys.UPDATE_OPS,
                    mean_running.assign(mean_running * decay + mean *
                                        (1 - decay)))
                tf.add_to_collection(
                    tf.GraphKeys.UPDATE_OPS,
                    variance_running.assign(variance_running * decay +
                                            variance * (1 - decay)))

            # apply layer normalization
            if opt.ln:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim, summary=opt.summary)
                if opt.scale:
                    gamma = init.constant('gamma',
                                          opt.dim,
                                          value=1,
                                          summary=opt.summary)

                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(out,
                                               axes=[len(out.get_shape()) - 1],
                                               keep_dims=True)

                # apply normalization
                out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                # apply parameter
                if opt.scale:
                    out = gamma * out + beta
                else:
                    out = out + beta

            # apply activation
            if opt.act:
                out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            if opt.summary:
                tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + sg_get_context(),
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
Beispiel #24
0
def _scalar(name, tensor):
    if not tf.get_variable_scope().reuse and not tf.sg_get_context().reuse:
        val = gen_logging_ops._scalar_summary(name, tensor)
        tf.add_to_collection(tf.GraphKeys.SUMMARIES, val)
Beispiel #25
0
    def wrapper(tensor, **kwargs):

        import sg_initializer as init
        import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0)
            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.get_collection(tf.GraphKeys.VARIABLES):
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + 'layers/' + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'layers/' prefix
        with tf.variable_scope('layers', reuse=opt.reuse):

            with tf.variable_scope(opt.name):

                # call layer function
                out = func(tensor, opt)

                # apply batch normalization
                if opt.bn:
                    # offset, scale parameter
                    beta = init.constant('beta', opt.dim)
                    gamma = init.constant('gamma', opt.dim, value=1)

                    # offset, scale parameter
                    mean_running = init.constant('mean', opt.dim)
                    variance_running = init.constant('variance',
                                                     opt.dim,
                                                     value=1)

                    # calc batch mean, variance
                    mean, variance = tf.nn.moments(
                        out, axes=range(len(out.get_shape()) - 1))

                    # update running mean, variance
                    def update_running_stat():
                        decay = 0.99
                        update_op = [
                            mean_running.assign(mean_running * decay + mean *
                                                (1 - decay)),
                            variance_running.assign(variance_running * decay +
                                                    variance * (1 - decay))
                        ]
                        with tf.control_dependencies(update_op):
                            return tf.identity(mean), tf.identity(variance)

                    # select mean, variance by training phase
                    m, v = tf.cond(
                        _phase,
                        update_running_stat,  # updated running stat and batch mean, variance
                        lambda: (mean_running, variance_running)
                    )  # saved mean, variance

                    # apply batch normalization
                    out = tf.nn.batch_normalization(out, m, v, beta, gamma,
                                                    tf.sg_eps)

                # apply normalization parameters
                if opt.ln:
                    # offset, scale parameter
                    beta = init.constant('beta', opt.dim)
                    gamma = init.constant('gamma', opt.dim, value=1)

                    # calc layer mean, variance for final axis
                    mean, variance = tf.nn.moments(
                        out, axes=[len(out.get_shape()) - 1], keep_dims=True)

                    # apply normalization
                    out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                    # apply parameter
                    out = gamma * out + beta

                # apply activation
                if opt.act:
                    out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

                # apply dropout
                if opt.dout:
                    out = tf.cond(_phase,
                                  lambda: tf.nn.dropout(out, 1 - opt.dout),
                                  lambda: out)

                # rename tensor
                out = tf.identity(out, 'out')

                # add final output summary
                if opt.reuse is None or not opt.reuse:
                    tf.sg_summary_activation(out)

                # save node info for reuse
                out._sugar = tf.sg_opt(func=func,
                                       arg=tf.sg_opt(kwargs) + _context,
                                       prev=tensor,
                                       is_layer=True,
                                       name=opt.name)
                # inject reuse function
                out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
Beispiel #26
0
    def wrapper(tensor, **kwargs):

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             dout=0)
            # disable bias when normalization on
            opt += tf.sg_opt(bias=not opt.ln)
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.get_collection(tf.GraphKeys.VARIABLES):
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + 'layers/' + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'layers/' prefix
        with tf.variable_scope('layers', reuse=opt.reuse):

            with tf.variable_scope(opt.name):

                # call layer function
                out = func(tensor, opt)

                # apply dropout
                if opt.dout:
                    out = tf.cond(_phase,
                                  lambda: tf.nn.dropout(out, 1 - opt.dout),
                                  lambda: out)

                # rename tensor
                out = tf.identity(out, 'out')

                # add final output summary
                if opt.reuse is None or not opt.reuse:
                    tf.sg_summary_activation(out)

                # save node info for reuse
                out._sugar = tf.sg_opt(func=func,
                                       arg=tf.sg_opt(kwargs) + _context,
                                       prev=tensor,
                                       is_layer=True,
                                       name=opt.name)
                # inject reuse function
                out.sg_reuse = types.MethodType(sg_reuse, out)

        return out