예제 #1
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.

        Args:
          tensor: automatically passed by decorator
          kwargs:
              in_dim: An integer. The size of input dimension, which is set to the last one by default.
              dim: An integer. The size of output dimension. Has the same value as in_dim by default.
              ln: Boolean. If True, layer normalization is applied.
              bias: Boolean. If True, biases are added. As a default, it is set to True
              name: A name for the layer. As a default, the function name is assigned.
              reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope
                as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
        """

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             dout=0)
            # disable bias when normalization on
            opt += tf.sg_opt(bias=not opt.ln)
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', 'lyr-')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'lyr-' prefix
        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            if scope.reuse:
                tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + _context,
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
예제 #2
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.
        
        Args:
          tensor: A `tensor` (automatically passed by decorator).
          kwargs:
            shape:  A list of integers. The shape of `tensor`. Inferred if not specified.
            in_dim: An integer. The size of input dimension, which is set to the last one by default.
            dim: An integer. The size of output dimension. Has the same value as in_dim by default.
            bn: Boolean. If True, batch normalization is applied.
            ln: Boolean. If True, layer normalization is applied.
            dout: A float of range [0, 100). A dropout rate. Set to 0 by default.
            bias: Boolean. If True, biases are added. As a default, it is set to True 
            name: A name for the layer. As a default, the function name is assigned.
            act: A name of activation function. e.g., `sigmoid`, `tanh`, etc.
            reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope 
              as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
        """

        from . import sg_initializer as init
        from . import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0)
            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'lyr-' prefix
        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)

            # apply batch normalization
            if opt.bn:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim, summary=False)
                gamma = init.constant('gamma', opt.dim, value=1, summary=False)

                # offset, scale parameter
                mean_running = init.constant('mean', opt.dim, summary=False)
                variance_running = init.constant('variance',
                                                 opt.dim,
                                                 value=1,
                                                 summary=False)

                # calc batch mean, variance
                mean, variance = tf.nn.moments(
                    out, axes=range(len(out.get_shape()) - 1))

                # update running mean, variance
                def update_running_stat():
                    decay = 0.99
                    update_op = [
                        mean_running.assign(mean_running * decay + mean *
                                            (1 - decay)),
                        variance_running.assign(variance_running * decay +
                                                variance * (1 - decay))
                    ]
                    with tf.control_dependencies(update_op):
                        return tf.identity(mean), tf.identity(variance)

                # select mean, variance by training phase
                m, v = tf.cond(
                    _phase,
                    update_running_stat,  # updated running stat and batch mean, variance
                    lambda:
                    (mean_running, variance_running))  # saved mean, variance

                # apply batch normalization
                out = tf.nn.batch_normalization(out, m, v, beta, gamma,
                                                tf.sg_eps)

            # apply normalization parameters
            if opt.ln:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim, summary=False)
                gamma = init.constant('gamma', opt.dim, value=1, summary=False)

                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(out,
                                               axes=[len(out.get_shape()) - 1],
                                               keep_dims=True)

                # apply normalization
                out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                # apply parameter
                out = gamma * out + beta

            # apply activation
            if opt.act:
                out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            if not scope.reuse:
                tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + _context,
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
예제 #3
0
    def wrapper(tensor, **kwargs):

        import sg_initializer as init
        import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0)
            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.get_collection(tf.GraphKeys.VARIABLES):
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + 'layers/' + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'layers/' prefix
        with tf.variable_scope('layers', reuse=opt.reuse):

            with tf.variable_scope(opt.name):

                # call layer function
                out = func(tensor, opt)

                # apply batch normalization
                if opt.bn:
                    # offset, scale parameter
                    beta = init.constant('beta', opt.dim)
                    gamma = init.constant('gamma', opt.dim, value=1)

                    # offset, scale parameter
                    mean_running = init.constant('mean', opt.dim)
                    variance_running = init.constant('variance',
                                                     opt.dim,
                                                     value=1)

                    # calc batch mean, variance
                    mean, variance = tf.nn.moments(
                        out, axes=range(len(out.get_shape()) - 1))

                    # update running mean, variance
                    def update_running_stat():
                        decay = 0.99
                        update_op = [
                            mean_running.assign(mean_running * decay + mean *
                                                (1 - decay)),
                            variance_running.assign(variance_running * decay +
                                                    variance * (1 - decay))
                        ]
                        with tf.control_dependencies(update_op):
                            return tf.identity(mean), tf.identity(variance)

                    # select mean, variance by training phase
                    m, v = tf.cond(
                        _phase,
                        update_running_stat,  # updated running stat and batch mean, variance
                        lambda: (mean_running, variance_running)
                    )  # saved mean, variance

                    # apply batch normalization
                    out = tf.nn.batch_normalization(out, m, v, beta, gamma,
                                                    tf.sg_eps)

                # apply normalization parameters
                if opt.ln:
                    # offset, scale parameter
                    beta = init.constant('beta', opt.dim)
                    gamma = init.constant('gamma', opt.dim, value=1)

                    # calc layer mean, variance for final axis
                    mean, variance = tf.nn.moments(
                        out, axes=[len(out.get_shape()) - 1], keep_dims=True)

                    # apply normalization
                    out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                    # apply parameter
                    out = gamma * out + beta

                # apply activation
                if opt.act:
                    out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

                # apply dropout
                if opt.dout:
                    out = tf.cond(_phase,
                                  lambda: tf.nn.dropout(out, 1 - opt.dout),
                                  lambda: out)

                # rename tensor
                out = tf.identity(out, 'out')

                # add final output summary
                if opt.reuse is None or not opt.reuse:
                    tf.sg_summary_activation(out)

                # save node info for reuse
                out._sugar = tf.sg_opt(func=func,
                                       arg=tf.sg_opt(kwargs) + _context,
                                       prev=tensor,
                                       is_layer=True,
                                       name=opt.name)
                # inject reuse function
                out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
예제 #4
0
    def wrapper(tensor, **kwargs):

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             dout=0)
            # disable bias when normalization on
            opt += tf.sg_opt(bias=not opt.ln)
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.get_collection(tf.GraphKeys.VARIABLES):
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + 'layers/' + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'layers/' prefix
        with tf.variable_scope('layers', reuse=opt.reuse):

            with tf.variable_scope(opt.name):

                # call layer function
                out = func(tensor, opt)

                # apply dropout
                if opt.dout:
                    out = tf.cond(_phase,
                                  lambda: tf.nn.dropout(out, 1 - opt.dout),
                                  lambda: out)

                # rename tensor
                out = tf.identity(out, 'out')

                # add final output summary
                if opt.reuse is None or not opt.reuse:
                    tf.sg_summary_activation(out)

                # save node info for reuse
                out._sugar = tf.sg_opt(func=func,
                                       arg=tf.sg_opt(kwargs) + _context,
                                       prev=tensor,
                                       is_layer=True,
                                       name=opt.name)
                # inject reuse function
                out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
예제 #5
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.

        Args:
          tensor: A `tensor` (automatically passed by decorator).
          kwargs:
            shape:  A list of integers. The shape of `tensor`. Inferred if not specified.
            in_dim: An integer. The size of input dimension, which is set to the last one by default.
            dim: An integer. The size of output dimension. Has the same value as in_dim by default.
            bn: Boolean. If True, batch normalization is applied.
            ln: Boolean. If True, layer normalization is applied.
            scale: If true, multiple by a trainable gamma variable. When the activation is
              linear (relu included), this can be disabled because it can be implicitly
              learned by the next layer. The default is True.
            dout: A float of range [0, 100). A dropout rate. Set to 0 by default.
            bias: Boolean. If True, biases are added. As a default, it is set to True
            name: A name for the layer. As a default, the function name is assigned.
            act: A name of activation function. e.g., `sigmoid`, `tanh`, etc.
            reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope
              as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
            regularizer:  A string. None, 'l1' or 'l2'. The default is None
            summary: If True, summaries are added. The default is True.
        """

        from . import sg_initializer as init
        from . import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + sg_get_context()

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0,
                             summary=True,
                             scale=True)
            if opt.regularizer == 'l1':
                opt.regularizer = lambda x: tf.reduce_mean(tf.abs(x))
            elif opt.regularizer == 'l2':
                opt.regularizer = lambda x: tf.square(
                    tf.reduce_mean(tf.square(x)))
            else:
                opt.regularizer = None

            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)
            out_shape = out.get_shape()

            # apply batch normalization
            if opt.bn:
                beta = init.constant('beta', opt.dim, summary=opt.summary)
                gamma = init.constant('gamma',
                                      opt.dim,
                                      value=1,
                                      summary=opt.summary,
                                      trainable=opt.scale)

                # offset, scale parameter ( for inference )
                mean_running = init.constant('mean',
                                             opt.dim,
                                             trainable=False,
                                             summary=opt.summary)
                variance_running = init.constant('variance',
                                                 opt.dim,
                                                 value=1,
                                                 trainable=False,
                                                 summary=opt.summary)

                # use fused batch norm if ndims in [2, 3, 4]
                if out_shape.ndims in [2, 3, 4]:
                    # add HW dims if necessary, fused_batch_norm requires shape to be NHWC
                    if out_shape.ndims == 2:
                        out = tf.expand_dims(out, axis=1)
                        out = tf.expand_dims(out, axis=2)
                    elif out_shape.ndims == 3:
                        out = tf.expand_dims(out, axis=2)

                    fused_eps = tf.sg_eps if tf.sg_eps > 1e-5 else 1e-5
                    out, mean, variance = tf.cond(
                        _phase,
                        lambda: tf.nn.fused_batch_norm(
                            out, gamma, beta, epsilon=fused_eps),
                        lambda: tf.nn.fused_batch_norm(out,
                                                       gamma,
                                                       beta,
                                                       mean=mean_running,
                                                       variance=
                                                       variance_running,
                                                       epsilon=fused_eps,
                                                       is_training=False),
                    )

                    # restore original shape if HW dims was added
                    if out_shape.ndims == 2:
                        out = tf.squeeze(out, axis=[1, 2])
                    elif out_shape.ndims == 3:
                        out = tf.squeeze(out, axis=2)

                # fallback to naive batch norm
                else:
                    mean, variance = tf.nn.moments(
                        out, axes=list(range(len(out.get_shape()) - 1)))
                    out = tf.cond(
                        _phase, lambda: tf.nn.batch_normalization(
                            out, mean, variance, beta, gamma, tf.sg_eps),
                        lambda: tf.nn.batch_normalization(
                            out, mean_running, variance_running, beta, gamma,
                            tf.sg_eps))

                decay = 0.99
                tf.add_to_collection(
                    tf.GraphKeys.UPDATE_OPS,
                    mean_running.assign(mean_running * decay + mean *
                                        (1 - decay)))
                tf.add_to_collection(
                    tf.GraphKeys.UPDATE_OPS,
                    variance_running.assign(variance_running * decay +
                                            variance * (1 - decay)))

            # apply layer normalization
            if opt.ln:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim, summary=opt.summary)
                if opt.scale:
                    gamma = init.constant('gamma',
                                          opt.dim,
                                          value=1,
                                          summary=opt.summary)

                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(out,
                                               axes=[len(out.get_shape()) - 1],
                                               keep_dims=True)

                # apply normalization
                out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                # apply parameter
                if opt.scale:
                    out = gamma * out + beta
                else:
                    out = out + beta

            # apply activation
            if opt.act:
                out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            if opt.summary:
                tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + sg_get_context(),
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out