Exemple #1
0
    def __init__(self, batch_size=128, reshape=False, one_hot=False):

        # load sg_data set
        data_set = input_data.read_data_sets(Mnist._data_dir,
                                             reshape=reshape,
                                             one_hot=one_hot)

        self.batch_size = batch_size

        # save each sg_data set
        _train = data_set.train
        _valid = data_set.validation
        _test = data_set.test

        # member initialize
        self.train, self.valid, self.test = tf.sg_opt(), tf.sg_opt, tf.sg_opt()

        # convert to tensor queue
        self.train.image, self.train.label = \
            _data_to_tensor([_train.images, _train.labels.astype('int32')], batch_size, name='train')
        self.valid.image, self.valid.label = \
            _data_to_tensor([_valid.images, _valid.labels.astype('int32')], batch_size, name='valid')
        self.test.image, self.test.label = \
            _data_to_tensor([_test.images, _test.labels.astype('int32')], batch_size, name='test')

        # calc total batch count
        self.train.num_batch = _train.labels.shape[0] // batch_size
        self.valid.num_batch = _valid.labels.shape[0] // batch_size
        self.test.num_batch = _test.labels.shape[0] // batch_size
Exemple #2
0
def sg_pool1d(tensor, opt):
    r"""Performs the 1-D pooling on the `tensor`.
    
    Args:
      tensor: A 3-D `Tensor` (automatically passed by decorator).
      opt:
        size: A positive `integer` representing `[kernel width]`.
          Default is 2.
        stride: A positive `integer`. The number of entries by which
          the filter is moved right at each step. Default is 2.
        avg: Boolean. If True, average pooling is applied. Otherwise, max pooling.
        name: If provided, replace current tensor's name.

    Returns:
        A tensor

    """

    # default stride and pad
    opt += tf.sg_opt(stride=2, pad='VALID')
    opt += tf.sg_opt(size=opt.stride)

    if opt.avg:
        out = tf.nn.avg_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1),
                             (1, opt.stride, 1, 1), opt.pad)
    else:
        out = tf.nn.max_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1),
                             (1, opt.stride, 1, 1), opt.pad)

    return tf.identity(out.sg_squeeze(dim=2), name=opt.name)
Exemple #3
0
def sg_optim(loss, **kwargs):
    opt = tf.sg_opt(kwargs)

    # default training options
    opt += tf.sg_opt(optim='MaxProp',
                     lr=0.001,
                     beta1=0.9,
                     beta2=0.99,
                     category='')

    # select optimizer
    if opt.optim == 'MaxProp':
        optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr,
                                                beta2=opt.beta2)
    elif opt.optim == 'AdaMax':
        optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr,
                                               beta1=opt.beta1,
                                               beta2=opt.beta2)

    # get trainable variables
    var_list = [
        t for t in tf.trainable_variables()
        if t.name.encode('utf8').startswith(opt.category)
    ]

    # calc gradient
    gradient = optim.compute_gradients(loss, var_list=var_list)

    # add summary
    for v, g in zip(var_list, gradient):
        tf.sg_summary_gradient(v, g)

    # gradient update op
    return optim.apply_gradients(gradient, global_step=tf.sg_global_step())
Exemple #4
0
def sg_reuse(tensor, **opt):
    opt = tf.sg_opt(opt)
    assert hasattr(tensor, '_sugar'), 'cannot reuse this node.'
    assert opt.input is not None, 'input is mandatory.'

    # get all nodes in this graph
    nodes, prev = [tensor], tensor._sugar.prev
    while prev is not None:
        nodes = [prev] + nodes
        prev = prev._sugar.prev if hasattr(prev, '_sugar') else None

    # create graph again for this input
    out = opt.input
    for node in nodes[1:]:  # exclude head node
        if node._sugar.is_layer:
            fn = tf.sg_layer_func(node._sugar.func)
            if node._sugar.arg.context_name:
                with tf.variable_scope(node._sugar.arg.context_name):
                    out = fn(
                        out,
                        **(node._sugar.arg +
                           tf.sg_opt(name=node._sugar.name, reuse=True)))
            else:
                out = fn(
                    out,
                    **(node._sugar.arg +
                       tf.sg_opt(name=node._sugar.name, reuse=True)))
        else:
            out = node._sugar.func(out, node._sugar.arg)

    return out
def sg_train(**kwargs):
    r"""Trains the model.

    Args:
      **kwargs:
        optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', or 'sgd'.
        loss: A 0-D `Tensor` containing the value to minimize.
        lr: A Python Scalar (optional). Learning rate. Default is .001.
        beta1: A Python Scalar (optional). Default is .9.
        beta2: A Python Scalar (optional). Default is .99.

        eval_metric: A list of tensors containing the value to evaluate. Default is [].
        early_stop: Boolean. If True (default), the training should stop when the following two conditions are met.
          i. Current loss is less than .95 * previous loss.
          ii. Current learning rate is less than 5e-6.
        lr_reset: Boolean. If True, learning rate is set to opt.lr. when training restarts.
          Otherwise (Default), the value of the stored `_learning_rate` is taken.
        save_dir: A string. The root path to which checkpoint and log files are saved.
          Default is `asset/train`.
        max_ep: A positive integer. Maximum number of epochs. Default is 1000.    
        ep_size: A positive integer. Number of Total batches in an epoch. 
          For proper display of log. Default is 1e5.    

        save_interval: A Python scalar. The interval of saving checkpoint files.
          By default, for every 600 seconds, a checkpoint file is written.
        log_interval: A Python scalar. The interval of recoding logs.
          By default, for every 60 seconds, logging is executed.
        max_keep: A positive integer. Maximum number of recent checkpoints to keep. Default is 5.
        keep_interval: A Python scalar. How often to keep checkpoints. Default is 1 hour.

        tqdm: Boolean. If True (Default), progress bars are shown.
        console_log: Boolean. If True, a series of loss will be shown 
          on the console instead of tensorboard. Default is False.
    """
    opt = tf.sg_opt(kwargs)
    assert opt.loss is not None, 'loss is mandatory.'

    # default training options
    opt += tf.sg_opt(optim='MaxProp',
                     lr=0.001,
                     beta1=0.9,
                     beta2=0.99,
                     category='')

    # get optimizer
    train_op = sg_optim(opt.loss,
                        optim=opt.optim,
                        lr=_learning_rate,
                        beta1=opt.beta1,
                        beta2=opt.beta2,
                        category=opt.category)

    # define train function
    # noinspection PyUnusedLocal
    @sg_train_func
    def train_func(sess, arg):
        return sess.run([opt.loss, train_op])[0]

    # run train function
    train_func(**opt)
Exemple #6
0
def sg_train(**kwargs):
    opt = tf.sg_opt(kwargs)
    assert opt.loss is not None, 'loss is mandatory.'

    # default training options
    opt += tf.sg_opt(optim='MaxProp',
                     lr=0.001,
                     beta1=0.9,
                     beta2=0.99,
                     category='')

    # get optimizer
    train_op = sg_optim(opt.loss,
                        optim=opt.optim,
                        lr=_learning_rate,
                        beta1=opt.beta1,
                        beta2=opt.beta2,
                        category=opt.category)

    # define train function
    @sg_train_func
    def train_func(sess, arg):
        return sess.run([opt.loss, train_op])[0]

    # run train function
    train_func(**opt)
Exemple #7
0
 def wrapper(tensor, **kwargs):
     # call sugar function
     out = func(tensor, tf.sg_opt(kwargs))
     # save node info for reuse
     out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs)+sg_get_context(), prev=tensor)
     # inject reuse function
     out.sg_reuse = types.MethodType(sg_reuse, out)
     return out
def classifier_train(**kwargs):
    r"""Trains the model.

    Args:
      **kwargs:
        optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', 'RMSProp' or 'sgd'.
        loss: A 0-D `Tensor` containing the value to minimize.
        lr: A Python Scalar (optional). Learning rate. Default is .001.
        beta1: A Python Scalar (optional). Default is .9.
        beta2: A Python Scalar (optional). Default is .99.
        clip_grad_norm : A Python Scalar (optional). Default is 10

        save_dir: A string. The root path to which checkpoint and log files are saved.
          Default is `asset/train`.
        max_ep: A positive integer. Maximum number of epochs. Default is 1000.    
        ep_size: A positive integer. Number of Total batches in an epoch. 
          For proper display of log. Default is 1e5.    

        save_interval: A Python scalar. The interval of saving checkpoint files.
          By default, for every 600 seconds, a checkpoint file is written.
        log_interval: A Python scalar. The interval of recoding logs.
          By default, for every 60 seconds, logging is executed.
        max_keep: A positive integer. Maximum number of recent checkpoints to keep. Default is 5.
        keep_interval: A Python scalar. How often to keep checkpoints. Default is 1 hour.

        category: Scope name or list to train

        eval_metric: A list of tensors containing the value to evaluate. Default is [].

        tqdm: Boolean. If True (Default), progress bars are shown. If False, a series of loss
            will be shown on the console.

    """
    opt = tf.sg_opt(kwargs)
    assert opt.loss is not None, 'loss is mandatory.'

    # default training options
    opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='', ep_size=100000, clip_grad_norm=10)

    # get optimizer
    train_op = sg_optim(opt.loss, optim=opt.optim, lr=0.001, clip_grad_norm=opt.clip_grad_norm,
                        beta1=opt.beta1, beta2=opt.beta2, category=opt.category)

    # for console logging
    loss_ = opt.loss

    # use only first loss when multiple GPU case
    if isinstance(opt.loss, (tuple, list)):
        loss_ = opt.loss[0]

    # define train function
    # noinspection PyUnusedLocal
    @sg_train_func
    def train_func(sess, arg):
        return sess.run([loss_, train_op])[0]

    # run train function
    train_func(**opt)
Exemple #9
0
    def wrapper(**kwargs):
        r"""Manages arguments of `tf.sg_opt`.

        Args:
          **kwargs:
            source: A source queue list to enqueue
            dtypes: Data types of each tensor
            capacity: Queue capacity. Default is 32.
            num_threads: Number of threads. Default is 1.
        """

        # default option
        opt = tf.sg_opt(kwargs) + tf.sg_opt(
            dtypes=[tf.sg_floatx], capacity=32, num_threads=1)

        # source queue list check
        assert opt.source is not None, 'source is mandatory.'
        if type(opt.source) is not list and type(opt.source) is not tuple:
            opt.source = [opt.source]
        if type(opt.dtypes) is not list and type(opt.dtypes) is not tuple:
            opt.dtypes = [opt.dtypes]
        assert len(opt.source) == len(
            opt.dtypes), 'Source and dtypes should have same length.'

        # enqueue function
        def enqueue_func(sess, op):
            # read data from source queue
            data = func(sess.run(opt.source))
            # create feeder dict
            feed_dict = {}
            for ph, col in zip(placeholders, data):
                feed_dict[ph] = col
            # run session
            sess.run(op, feed_dict=feed_dict)

        # create place holder list
        placeholders = []
        for dtype in opt.dtypes:
            placeholders.append(tf.placeholder(dtype=dtype))

        # create FIFO queue
        queue = tf.FIFOQueue(opt.capacity, dtypes=opt.dtypes)

        # enqueue operation
        enqueue_op = queue.enqueue(placeholders)

        # create queue runner
        runner = _FuncQueueRunner(enqueue_func, queue,
                                  [enqueue_op] * opt.num_threads)

        # register to global collection
        tf.train.add_queue_runner(runner)

        # return de-queue operation
        return queue.dequeue()
Exemple #10
0
def sg_context(**kwargs):
    global _context
    # set options when enter
    _context = tf.sg_opt(kwargs)
    if _context.name:
        _context.context_name = _context.name
        _context.name = None
        with tf.variable_scope(_context.context_name):
            yield
    else:
        yield
    # clear options when exit
    _context = tf.sg_opt()
Exemple #11
0
def sg_pool1d(tensor, opt):
    # default stride and pad
    opt += tf.sg_opt(stride=2, pad='VALID')
    opt += tf.sg_opt(size=opt.stride)

    if opt.avg:
        out = tf.nn.avg_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1),
                             (1, opt.stride, 1, 1), opt.pad)
    else:
        out = tf.nn.max_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1),
                             (1, opt.stride, 1, 1), opt.pad)

    return tf.identity(out.sg_squeeze(dim=2), name=opt.name)
Exemple #12
0
def _densenet_graph(x, opt, nums):

    # default option
    opt += tf.sg_opt(num_class=1000, k=32, conv_only=False, squeeze=True, act='relu')

    # convolution layers ( dense net arch )
    with tf.sg_context(name=opt.name):
        conv = (x
                .sg_conv(dim=opt.k, size=7, stride=2, bias=False, reuse=opt.reuse, name='conv1')
                .sg_pool(size=3, stride=2, pad='SAME')
                .sg_densenet_layer(dim=opt.k, num=nums[0], act=opt.act, reuse=opt.reuse, name='conv2')
                .sg_densenet_layer(dim=opt.k, num=nums[1], act=opt.act, reuse=opt.reuse, name='conv3')
                .sg_densenet_layer(dim=opt.k, num=nums[2], act=opt.act, reuse=opt.reuse, name='conv4')
                .sg_densenet_layer(dim=opt.k, num=nums[3], act=opt.act, trans=False, reuse=opt.reuse, name='conv5')
                .sg_bypass(act=opt.act, bn=True, reuse=opt.reuse, name='final_act')  # final activation
                .sg_pool(size=7, stride=1, avg=True))  # global average pool

        # fully convolution layers
        fc = conv.sg_conv(dim=opt.num_class, size=1, act='linear', bn=False, reuse=opt.reuse, name='fc')

    # return selectively
    if opt.conv_only:
        return conv
    else:
        if opt.squeeze:
            return fc.sg_squeeze(dim=(1, 2))
        else:
            return fc
def sg_aconv(tensor, opt):
    r"""Applies a 2-D atrous (or dilated) convolution.
    
    Args:
      tensor: A 4-D `Tensor` (automatically passed by decorator).
      opt:
        size: A tuple/list of positive integers of length 2 representing `[kernel height, kernel width]`.
          Can be an integer if both values are the same.
          If not specified, (3, 3) is set automatically.
        rate: A positive integer. The stride with which we sample input values across
          the `height` and `width` dimensions. Default is 2.
        in_dim: A positive `integer`. The size of input dimension.
        dim: A positive `integer`. The size of output dimension.
        pad: Either `SAME` (Default) or `VALID`.
        bias: Boolean. If True, biases are added.
        regularizer:  A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable
          will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization
        summary: If True, summaries are added. The default is True.

    Returns:
      A `Tensor` with the same type as `tensor`.
    """
    # default options
    opt += tf.sg_opt(size=(3, 3), rate=2, pad='SAME')
    opt.size = opt.size if isinstance(opt.size, (tuple, list)) else [opt.size, opt.size]

    # parameter tf.sg_initializer
    w = tf.sg_initializer.he_uniform('W', (opt.size[0], opt.size[1], opt.in_dim, opt.dim),
                                     regularizer=opt.regularizer, summary=opt.summary)
    b = tf.sg_initializer.constant('b', opt.dim, summary=opt.summary) if opt.bias else 0

    # apply convolution
    out = tf.nn.atrous_conv2d(tensor, w, rate=opt.rate, padding=opt.pad) + b

    return out
Exemple #14
0
def sg_emb(**kwargs):
    r"""Returns an embedding layer or a look-up table.
    
    Args:
      name: A name for the layer (required).
      emb: A 2-D array. Has the shape of `[vocabulary size -1, embedding dimension size]`.
        Note that the first row is filled with 0's because they correspond to padding.
      in_dim: A positive `integer`. The size of input dimension.
      dim: A positive `integer`. The size of output dimension.
      voca_size: A positive int32.
      
    Returns:
      A 2-D tensor.
    """
    opt = tf.sg_opt(kwargs)
    assert opt.name is not None, 'name is mandatory.'

    import sg_initializer as init

    if opt.emb is None:
        # initialize embedding matrix
        assert opt.voca_size is not None, 'voca_size is mandatory.'
        assert opt.dim is not None, 'dim is mandatory.'
        w = init.he_uniform(opt.name, (opt.voca_size - 1, opt.dim))
    else:
        # use given embedding matrix
        w = init.external(opt.name, value=opt.emb)

    # 1st row should be zero and not be updated by backprop because of zero padding.
    emb = tf.concat(0, [tf.zeros((1, opt.dim), dtype=tf.sg_floatx), w])

    return emb
Exemple #15
0
def sg_conv1d(tensor, opt):
    r"""Applies a 1-D convolution.
    
    Args:
      tensor: A `Tensor`.
      size: An `integer` representing `[kernel width]`.
        If not specified, 2 is set implicitly.
      stride: An `integer`. The number of entries by which
        the filter is moved right at each step.
      in_dim: An `integer`. The size of input dimension.
      dim: An `integer`. The size of output dimension.
      pad: Either `SAME` (Default) or `VALID`.
      bias: Boolean. Whether to add biases to the filters.
      
    Returns:
      A `Tensor` with the same type as `tensor`.
    """
    # default options
    opt += tf.sg_opt(size=2, stride=1, pad='SAME')

    # parameter initialize
    w = init.he_uniform('W', (opt.size, opt.in_dim, opt.dim))
    if opt.bias:
        b = init.constant('b', opt.dim)

    # apply convolution
    out = tf.nn.conv1d(tensor, w, stride=opt.stride,
                       padding=opt.pad) + (b if opt.bias else 0)

    return out
def sg_conv1d(tensor, opt):
    r"""Applies a 1-D convolution.
    
    Args:
      tensor: A 3-D `Tensor` (automatically passed by decorator).
      opt:
        size: A positive `integer` representing `[kernel width]`.
          If not specified, 2 is set implicitly.
        stride: A positive `integer`. The number of entries by which
          the filter is moved right at each step.
        in_dim: A positive `integer`. The size of input dimension.
        dim: A positive `integer`. The size of output dimension.
        pad: Either `SAME` (Default) or `VALID`.
        bias: Boolean. If True, biases are added.
        regularizer:  A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable
          will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization
        summary: If True, summaries are added. The default is True.

    Returns:
      A `Tensor` with the same type as `tensor`.
    """
    # default options
    opt += tf.sg_opt(size=2, stride=1, pad='SAME')

    # parameter tf.sg_initializer
    w = tf.sg_initializer.he_uniform('W', (opt.size, opt.in_dim, opt.dim),
                                     regularizer=opt.regularizer, summary=opt.summary)
    b = tf.sg_initializer.constant('b', opt.dim, summary=opt.summary) if opt.bias else 0

    # apply convolution
    out = tf.nn.conv1d(tensor, w, stride=opt.stride, padding=opt.pad) + b

    return out
Exemple #17
0
def sg_aconv(tensor, opt):
    r"""Applies a 2-D atrous (or dilated) convolution.
    
    Args:
      tensor: A 4-D `Tensor`.
      size: A tuple or list of integers of length 2 representing `[kernel height, kernel width]`.
        Can be an int if both values are the same.
        If not specified, (3, 3) is set automatically.
      rate: A positive int32. The stride with which we sample input values across
        the `height` and `width` dimensions. Default is 2.
      in_dim: An `integer`. The size of input dimension.
      dim: An `integer`. The size of output dimension.
      pad: Either `SAME` (Default) or `VALID`.
      bias: Boolean. Whether to add biases to the filters.
            
    Returns:
      A `Tensor` with the same type as `tensor`.
    """
    # default options
    opt += tf.sg_opt(size=(3, 3), rate=2, pad='SAME')
    opt.size = opt.size if isinstance(opt.size,
                                      (tuple, list)) else [opt.size, opt.size]

    # parameter initialize
    w = init.he_uniform('W', (opt.size[0], opt.size[1], opt.in_dim, opt.dim))
    if opt.bias:
        b = init.constant('b', opt.dim)

    # apply convolution
    out = tf.nn.atrous_conv2d(tensor, w, rate=opt.rate,
                              padding=opt.pad) + (b if opt.bias else 0)

    return out
Exemple #18
0
def sg_ctc(tensor, opt):
    r"""Computes the CTC (Connectionist Temporal Classification) Loss between `tensor` and `target`.

    Args:
      tensor: A 3-D `float Tensor`.
      opt:
        target: A `Tensor` with the same length in the first dimension as the `tensor`. Labels. ( Dense tensor )
        name: A `string`. A name to display in the tensor board web UI.

    Returns:
      A 1-D `Tensor` with the same length in the first dimension of the `tensor`.

    For example,

    ```
    tensor = [[[2., -1., 3.], [3., 1., -2.]], [[1., -1., 2.], [3., 1., -2.]]]
    target = [[2., 1.], [2., 3.]]
    tensor.sg_ctc(target=target) => [ 4.45940781  2.43091154]
    ```
    """
    assert opt.target is not None, 'target is mandatory.'

    # default sequence length
    shape = tf.shape(tensor)
    opt += tf.sg_opt(seq_len=tf.ones((shape[0],), dtype=tf.sg_intx) * shape[1], merge=True)

    # ctc loss
    out = tf.nn.ctc_loss(opt.target.sg_to_sparse(), tensor, opt.seq_len,
                         ctc_merge_repeated=opt.merge, time_major=False)
    out = tf.identity(out, 'ctc')

    # add summary
    tf.sg_summary_loss(out, name=opt.name)

    return out
Exemple #19
0
def sg_emb(**kwargs):
    r"""Returns a look-up table for embedding.
    
    kwargs:
      name: A name for the layer.
      emb: A 2-D array (optional). 
        If None, the resulting tensor should have the shape of 
        `[vocabulary size, embedding dimension size]`.
        Note that its first row is filled with 0's associated with padding.
      in_dim: A positive `integer`. The size of input dimension.
      dim: A positive `integer`. The size of output dimension.
      voca_size: A positive integer. The size of vocabulary.
      
    Returns:
      A 2-D `Tensor` of float32.
    """
    opt = tf.sg_opt(kwargs)
    assert opt.name is not None, 'name is mandatory.'

    if opt.emb is None:
        # initialize embedding matrix
        assert opt.voca_size is not None, 'voca_size is mandatory.'
        assert opt.dim is not None, 'dim is mandatory.'
        w = tf.sg_initializer.he_uniform(opt.name,
                                         (opt.voca_size - 1, opt.dim))
    else:
        # use given embedding matrix
        w = tf.sg_initializer.external(opt.name, value=opt.emb)

    # 1st row should be zero and not be updated by backprop because of zero padding.
    emb = tf.concat(0, [tf.zeros((1, opt.dim), dtype=tf.sg_floatx), w])

    return emb
def ner_accuracy(tensor, opt):
    r"""Returns accuracy of predictions.

    Args:
      tensor: A `Tensor`. Probability distributions or unscaled prediction scores.
      opt:
        target: A 'Tensor`. Labels.

    Returns:
      A `Tensor` of the same shape as `tensor`. Each value will be 1 if correct else 0. 

    For example,

    ```
    tensor = [[20.1, 18, -4.2], [0.04, 21.1, 31.3]]
    target = [[0, 1]]
    tensor.sg_accuracy(target=target) => [[ 1.  0.]]
    ```
    """
    assert opt.target is not None, 'target is mandatory.'
    opt += tf.sg_opt(k=1)

    # # calc accuracy
    out = tf.identity(tf.equal(tensor.sg_argmax() + 1, tf.cast(opt.target, tf.int64)).sg_float(), name='acc')
    # out = tf.identity(tf.nn.in_top_k(tensor, opt.target, opt.k).sg_float(), name='acc')

    # masking padding
    if opt.mask:
        out += tf.equal(opt.target, tf.zeros_like(opt.target)).sg_float()

    return out
Exemple #21
0
def sg_conv1d(tensor, opt):
    r"""Applies a 1-D convolution.
    
    Args:
      tensor: A 3-D `Tensor` (automatically passed by decorator).
      opt:
        size: A positive `integer` representing `[kernel width]`.
          If not specified, 2 is set implicitly.
        stride: A positive `integer`. The number of entries by which
          the filter is moved right at each step.
        in_dim: A positive `integer`. The size of input dimension.
        dim: A positive `integer`. The size of output dimension.
        pad: Either `SAME` (Default) or `VALID`.
        bias: Boolean. If True, biases are added.
      
    Returns:
      A `Tensor` with the same type as `tensor`.
    """
    # default options
    opt += tf.sg_opt(size=2, stride=1, pad='SAME')

    # parameter tf.sg_initializer
    w = tf.sg_initializer.he_uniform('W', (opt.size, opt.in_dim, opt.dim))
    b = tf.sg_initializer.constant('b', opt.dim) if opt.bias else 0

    # apply convolution
    out = tf.nn.conv1d(tensor, w, stride=opt.stride, padding=opt.pad) + b

    return out
Exemple #22
0
def sg_upconv(tensor, opt):
    # default options
    opt += tf.sg_opt(size=(3, 3), stride=(1, 2, 2, 1), pad='SAME')
    opt.size = opt.size if isinstance(opt.size,
                                      (tuple, list)) else [opt.size, opt.size]
    opt.stride = opt.stride if isinstance(
        opt.stride, (tuple, list)) else [1, opt.stride, opt.stride, 1]
    opt.stride = [1, opt.stride[0], opt.stride[1], 1] if len(
        opt.stride) == 2 else opt.stride

    # parameter initialize
    w = init.he_uniform('W', (opt.size[0], opt.size[1], opt.dim, opt.in_dim))
    if opt.bias:
        b = init.constant('b', opt.dim)

    # tedious shape handling for conv2d_transpose
    shape = tensor.get_shape().as_list()
    out_shape = [
        tf.shape(tensor)[0], shape[1] * opt.stride[1],
        shape[2] * opt.stride[2], opt.dim
    ]

    # apply convolution
    out = tf.nn.conv2d_transpose(tensor,
                                 w,
                                 output_shape=tf.pack(out_shape),
                                 strides=opt.stride,
                                 padding=opt.pad) + (b if opt.bias else 0)
    # reset shape is needed because conv2d_transpose() erase all shape information.
    out.set_shape([None, out_shape[1], out_shape[2], opt.dim])

    return out
Exemple #23
0
def sg_quasi_conv1d(tensor, opt):
    opt += tf.sg_opt(is_enc=False)
    # Split into H and H_zfo
    H = tensor[:Hp.bs]
    H_z = tensor[Hp.bs:2 * Hp.bs]
    H_f = tensor[2 * Hp.bs:3 * Hp.bs]
    H_o = tensor[3 * Hp.bs:]
    if opt.is_enc:
        H_z, H_f, H_o = 0, 0, 0

    # Convolution and merging
    with tf.sg_context(act="linear",
                       causal=(not opt.is_enc),
                       bn=opt.is_enc,
                       ln=(not opt.is_enc)):
        Z = H.sg_aconv1d() + H_z  # (16, 300, 320)
        F = H.sg_aconv1d() + H_f  # (16, 300, 320)
        O = H.sg_aconv1d() + H_o  # (16, 300, 320)

    # Activation
    Z = Z.sg_bypass(act="tanh")  # (16, 300, 320)
    F = F.sg_bypass(act="sigmoid")  # (16, 300, 320)
    O = O.sg_bypass(act="sigmoid")  # (16, 300, 320)

    # Masking
    M = tf.sign(tf.abs(H))[:, :, :1]  # (16, 300, 1) float32. 0 or 1
    Z *= M  # broadcasting
    F *= M  # broadcasting
    O *= M  # broadcasting

    # Concat
    ZFO = tf.concat(axis=0, values=[Z, F, O])

    return ZFO  # (16*3, 150, 320)
Exemple #24
0
def sg_res_block(tensor, opt):
    # default rate
    opt += tf.sg_opt(size=3, rate=1, causal=False)

    # input dimension
    in_dim = tensor.get_shape().as_list()[-1]

    # reduce dimension
    input_ = (tensor.sg_bypass(act='relu', bn=(not opt.causal),
                               ln=opt.causal).sg_conv1d(size=1,
                                                        dim=in_dim / 2,
                                                        act='relu',
                                                        bn=(not opt.causal),
                                                        ln=opt.causal))

    # 1xk conv dilated
    out = input_.sg_aconv1d(size=opt.size,
                            rate=opt.rate,
                            causal=opt.causal,
                            act='relu',
                            bn=(not opt.causal),
                            ln=opt.causal)

    # dimension recover and residual connection
    out = out.sg_conv1d(size=1, dim=in_dim) + tensor

    return out
Exemple #25
0
def sg_periodic_shuffle(tensor, opt):
    # default factor
    opt += tf.sg_opt(factor=2)

    # get current shape
    batch, row, col, channel = tensor.get_shape().as_list()

    # get target channel num
    channel_target = channel / (opt.factor * opt.factor)
    channel_factor = channel / channel_target

    # intermediate shape for shuffling
    shape_1 = [
        batch, row, col, channel_factor / opt.factor,
        channel_factor / opt.factor
    ]
    shape_2 = [batch, row * opt.factor, col * opt.factor, 1]

    # reshape and transpose for periodic shuffling for each channel
    out = []
    for i in range(channel_target):
        out.append(
            (tensor[:, :, :, i * channel_factor:(i + 1) *
                    channel_factor]).sg_reshape(shape=shape_1).sg_transpose(
                        perm=(0, 1, 3, 2, 4)).sg_reshape(shape=shape_2))

    # final output
    out = tf.concat(3, out)

    return tf.identity(out, name=opt.name)
Exemple #26
0
def sg_aconv1d(tensor, opt):

    # default options
    opt += tf.sg_opt(size=(2 if opt.causal else 3), rate=1, pad='SAME')

    # parameter initialize
    w = init.he_uniform('W', (1, opt.size, opt.in_dim, opt.dim))
    if opt.bias:
        b = init.constant('b', opt.dim)

    if opt.causal:
        # pre-padding for causality
        if opt.pad == 'SAME':
            pad_len = (opt.size - 1) * opt.rate  # padding size
            x = tf.pad(tensor,
                       [[0, 0], [pad_len, 0], [0, 0]]).sg_expand_dims(dim=1)
        else:
            x = tensor.sg_expand_dims(dim=1)
        # apply 2d convolution
        out = tf.nn.atrous_conv2d(x, w, rate=opt.rate,
                                  padding='VALID') + (b if opt.bias else 0)
    else:
        # apply 2d convolution
        out = tf.nn.atrous_conv2d(
            tensor.sg_expand_dims(dim=1), w, rate=opt.rate,
            padding=opt.pad) + (b if opt.bias else 0)
    # reduce dimension
    out = out.sg_squeeze(dim=1)

    return out
def sg_res_block(tensor, opt):
    # default rate

    opt += tf.sg_opt(size=3, rate=1, causal=False, is_first=False)

    # input dimension
    in_dim = tensor.get_shape().as_list()[-1]

    with tf.sg_context(dev=opt.dev, reuse=opt.reuse_vars):
        #reduce dim
        input_ = (tensor.sg_bypass_gpus(
            act='leaky_relu', ln=(not opt.is_first),
            name="relu_" + opt.name).sg_conv1d_gpus(size=1,
                                                    dim=in_dim / 2,
                                                    act='leaky_relu',
                                                    ln=opt.causal,
                                                    name="convi_" + opt.name))

        # 1xk conv dilated
        out = input_.sg_aconv1d_gpus(size=opt.size,
                                     rate=opt.rate,
                                     causal=opt.causal,
                                     act='leaky_relu',
                                     ln=opt.causal,
                                     name="aconv_" + opt.name)

        # dimension recover and residual connection
        out = out.sg_conv1d_gpus(size=1, dim=in_dim,
                                 name="convo_" + opt.name) + tensor

    return out
Exemple #28
0
def sg_res_block(tensor, opt):
    # default rate
    opt += tf.sg_opt(size=3, rate=1, causal=False, is_first=False, dout=0)

    # input dimension
    in_dim = tensor.get_shape().as_list()[-1]

    with tf.sg_context(name='block_%d_%d' % (opt.block, opt.rate)):
        # reduce dimension
        input_ = (tensor.sg_bypass(act='relu',
                                   ln=(not opt.is_first),
                                   name='bypass').sg_conv1d(
                                       size=1,
                                       dim=in_dim / 2,
                                       act='relu',
                                       ln=True,
                                       regularizer=reg_type,
                                       name='conv_in'))

        # 1xk conv dilated
        out = (input_.sg_aconv1d(size=opt.size,
                                 rate=opt.rate,
                                 causal=opt.causal,
                                 act='relu',
                                 ln=True,
                                 regularizer=reg_type,
                                 name='aconv'))

        # dimension recover and residual connection
        out = out.sg_conv1d(
            size=1, dim=in_dim, regularizer=reg_type, name='conv_out') + tensor

        out = out.identity(ln=True, name='layer_norm')

    return out
Exemple #29
0
def sg_inverse_periodic_shuffle(tensor, opt):
    # default factor
    opt += tf.sg_opt(factor=2)

    # get current shape
    batch, row, col, channel = tensor.get_shape().as_list()

    # get target shape and channel num
    channel_factor = opt.factor * opt.factor

    # intermediate shape for shuffling
    shape_1 = [
        batch, row / opt.factor, col / opt.factor,
        channel_factor // opt.factor, channel_factor // opt.factor
    ]
    shape_2 = [batch, row / opt.factor, col / opt.factor, channel_factor]

    # reshape and transpose for periodic shuffling for each channel
    out = []
    for i in range(channel):
        out.append(tensor[:, :, :, i].sg_expand_dims().sg_reshape(
            shape=shape_1).sg_transpose(perm=(0, 1, 3, 2,
                                              4)).sg_reshape(shape=shape_2))

    # final output
    out = tf.concat(3, out)

    return tf.identity(out, name=opt.name)
Exemple #30
0
def ner_accuracy(tensor, opt):
    r"""Returns accuracy of predictions.

    Args:
      tensor: A `Tensor`. Probability distributions or unscaled prediction scores.
      opt:
        target: A 'Tensor`. Labels.

    Returns:
      A `Tensor` of the same shape as `tensor`. Each value will be 1 if correct else 0. 

    For example,

    ```
    tensor = [[20.1, 18, -4.2], [0.04, 21.1, 31.3]]
    target = [[0, 1]]
    tensor.sg_accuracy(target=target) => [[ 1.  0.]]
    ```
    """
    assert opt.target is not None, 'target is mandatory.'
    opt += tf.sg_opt(k=1)

    # # calc accuracy
    out = tf.identity(tf.equal(tensor.sg_argmax() + 1,
                               tf.cast(opt.target, tf.int64)).sg_float(),
                      name='acc')
    # out = tf.identity(tf.nn.in_top_k(tensor, opt.target, opt.k).sg_float(), name='acc')

    # masking padding
    if opt.mask:
        out += tf.equal(opt.target, tf.zeros_like(opt.target)).sg_float()

    return out
Exemple #31
0
def sg_reverse_seq(tensor, opt):
    r"""Reverses variable length slices.

    Before applying the pure tensorflow function tf.reverse_sequence,
      this function calculates sequence lengths by counting non-zeros.

    For example,
    
    ```
    tensor = [[1, 2, 3, 0, 0], [4, 5, 0, 0, 0]]
    tensor.sg_reverse_seq()
    => [[3 2 1 0 0]
        [5 4 0 0 0]]
    ```
        
    Args:
      tensor: A 2-D `Tensor` (automatically given by chain).
      opt:
        dim: Dimension to reverse. Default is 1.
        name : If provided, it replaces current tensor's name.

    Returns:
      A `Tensor` with the same shape and type as `tensor`.
    """
    # default sequence dimension
    opt += tf.sg_opt(dim=1)
    seq_len = tf.not_equal(tensor,
                           tf.zeros_like(tensor)).sg_int().sg_sum(dims=opt.dim)
    return tf.reverse_sequence(tensor, seq_len, opt.dim, name=opt.name)
def sg_res_block(tensor, opt):
    # default rate
    opt += tf.sg_opt(size=3, rate=1, causal=False, is_first=False, dout=0)

    # input dimension
    in_dim = tensor.get_shape().as_list()[-1]

    with tf.sg_context(name='block_%d_%d' % (opt.block, opt.rate)):
        # reduce dimension
        input_ = (tensor
                  .sg_bypass(act='relu', ln=(not opt.is_first), name='bypass')
                  .sg_conv1d(size=1, dim=in_dim / 2, act='relu', ln=True, regularizer=reg_type, name='conv_in'))

        # 1xk conv dilated
        out = (input_
               .sg_aconv1d(size=opt.size, rate=opt.rate, causal=opt.causal, act='relu', ln=True,
                           regularizer=reg_type, name='aconv'))

        # dimension recover and residual connection
        out = out.sg_conv1d(size=1, dim=in_dim, regularizer=reg_type, name='conv_out') + tensor

        out = out.identity(ln=True, name='layer_norm')

    return out
    def wrapper(**kwargs):
        r""" Manages arguments of `tf.sg_opt`.

        Args:
          **kwargs:
            lr: A Python Scalar (optional). Learning rate. Default is .001.

            save_dir: A string. The root path to which checkpoint and log files are saved.
              Default is `asset/train`.
            max_ep: A positive integer. Maximum number of epochs. Default is 1000.
            ep_size: A positive integer. Number of Total batches in an epoch.
              For proper display of log. Default is 1e5.

            save_interval: A Python scalar. The interval of saving checkpoint files.
              By default, for every 600 seconds, a checkpoint file is written.
            log_interval: A Python scalar. The interval of recoding logs.
              By default, for every 60 seconds, logging is executed.
            max_keep: A positive integer. Maximum number of recent checkpoints to keep. Default is 5.
            keep_interval: A Python scalar. How often to keep checkpoints. Default is 1 hour.

            eval_metric: A list of tensors containing the value to evaluate. Default is [].

            tqdm: Boolean. If True (Default), progress bars are shown. If False, a series of loss
                will be shown on the console.
        """
        opt = tf.sg_opt(kwargs)

        # default training options
        opt += tf.sg_opt(lr=0.001,
                         save_dir='asset/train',
                         max_ep=1000, ep_size=100000,
                         save_interval=600, log_interval=60,
                         eval_metric=[],
                         max_keep=5, keep_interval=1,
                         tqdm=True)

        # training epoch and loss
        epoch, loss = -1, None

        # checkpoint saver
        saver = tf.train.Saver(max_to_keep=opt.max_keep,
                               keep_checkpoint_every_n_hours=opt.keep_interval)

        # add evaluation summary
        for m in opt.eval_metric:
            tf.sg_summary_metric(m)

        # summary writer
        log_dir = opt.save_dir + '/run-%02d%02d-%02d%02d' % tuple(tf.time.localtime(tf.time.time()))[1:5]
        summary_writer = tf.summary.FileWriter(log_dir)

        # console logging function
        def console_log(sess_):
            if epoch >= 0:
                tf.sg_info('\tEpoch[%03d:gs=%d] - loss = %s' %
                           (epoch, sess_.run(tf.sg_global_step()),
                            ('NA' if loss is None else '%8.6f' % loss)))

        local_init_op = tf.group(tf.sg_phase().assign(True), tf.tables_initializer(), tf.local_variables_initializer())

        # create supervisor
        sv = tf.train.Supervisor(logdir=opt.save_dir,
                                 saver=saver,
                                 save_model_secs=opt.save_interval,
                                 summary_writer=summary_writer,
                                 save_summaries_secs=opt.log_interval,
                                 global_step=tf.sg_global_step(),
                                 local_init_op=local_init_op)

        # create session
        with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:

            # console logging loop
            if not opt.tqdm:
                sv.loop(opt.log_interval, console_log, args=(sess,))

            # get start epoch
            _step = sess.run(tf.sg_global_step())
            ep = _step // opt.ep_size

            best_f1 = 0
            # check if already finished
            if ep <= opt.max_ep:

                # logging
                tf.sg_info('Training started from epoch[%03d]-step[%d].' % (ep, _step))

                # epoch loop
                for ep in range(ep, opt.max_ep + 1):

                    # update epoch info
                    start_step = sess.run(tf.sg_global_step()) % opt.ep_size
                    epoch = ep

                    # create progressbar iterator
                    if opt.tqdm:
                        iterator = tf.tqdm(range(start_step, opt.ep_size), total=opt.ep_size, initial=start_step,
                                           desc='train', ncols=70, unit='b', leave=False)
                    else:
                        iterator = range(start_step, opt.ep_size)

                    # batch loop
                    for _ in iterator:

                        # exit loop
                        if sv.should_stop():
                            break

                        # call train function
                        batch_loss = func(sess, opt)

                        # loss history update
                        if batch_loss is not None and \
                                not np.isnan(batch_loss.all()) and not np.isinf(batch_loss.all()):
                            if loss is None:
                                loss = np.mean(batch_loss)
                            else:
                                loss = loss * 0.9 + np.mean(batch_loss) * 0.1

                    # log epoch information
                    console_log(sess)

                    f1_stat = show_metrics(sv, sess, opt.eval_metric[2], opt.eval_metric[3], ep, opt.val_ep_size,
                                              'val', use_tqdm=True)

                    if f1_stat > best_f1:
                        best_f1 = f1_stat

                        max_model_file = opt.save_dir + max_model_name

                        # save last version
                        saver.save(sess, max_model_file)
                        print("Improved F1 score, max model saved in file: %s" % max_model_file)

                        print('Test metrics:')
                        show_metrics(sv, sess, opt.test_metric[0], opt.test_metric[1], ep, opt.test_ep_size,
                                        'test', use_tqdm=True)

                # save last version
                saver.save(sess, opt.save_dir + '/model.ckpt', global_step=sess.run(tf.sg_global_step()))

                # logging
                tf.sg_info('Training finished at epoch[%d]-step[%d].' % (ep, sess.run(tf.sg_global_step())))
            else:
                tf.sg_info('Training already finished at epoch[%d]-step[%d].' %
                           (ep - 1, sess.run(tf.sg_global_step())))
def sg_optim(loss, **kwargs):
    r"""Applies gradients to variables.
    Args:
        loss: A 0-D `Tensor` containing the value to minimize. list of 0-D tensor for Multiple GPU
        kwargs:
          optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', 'RMSProp' or 'sgd'.
          lr: A Python Scalar (optional). Learning rate. Default is .001.
          beta1: A Python Scalar (optional). Default is .9.
          beta2: A Python Scalar (optional). Default is .99.
          momentum : A Python Scalar for RMSProp optimizer (optional). Default is 0.
          category: A string or string list. Specifies the variables that should be trained (optional).
            Only if the name of a trainable variable starts with `category`, it's value is updated.
            Default is '', which means all trainable variables are updated.
    """
    opt = tf.sg_opt(kwargs)

    # default training options
    opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, momentum=0., category='')

    # select optimizer
    if opt.optim == 'MaxProp':
        optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr, beta2=opt.beta2)
    elif opt.optim == 'AdaMax':
        optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2)
    elif opt.optim == 'Adam':
        optim = tf.train.AdamOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2)
    elif opt.optim == 'RMSProp':
        optim = tf.train.RMSPropOptimizer(learning_rate=opt.lr, decay=opt.beta1, momentum=opt.momentum)
    else:
        optim = tf.train.GradientDescentOptimizer(learning_rate=opt.lr)

    # get trainable variables
    if isinstance(opt.category, (tuple, list)):
        var_list = []
        for cat in opt.category:
            var_list.extend([t for t in tf.trainable_variables() if t.name.startswith(cat)])
    else:
        var_list = [t for t in tf.trainable_variables() if t.name.startswith(opt.category)]

    #
    # calc gradient
    #

    # multiple GPUs case
    if isinstance(loss, (tuple, list)):
        gradients = []
        # loop for each GPU tower
        for i, loss_ in enumerate(loss):
            # specify device
            with tf.device('/gpu:%d' % i):
                # give new scope only to operation
                with tf.name_scope('gpu_%d' % i):
                    # add gradient calculation operation for each GPU tower
                    gradients.append(tf.gradients(loss_, var_list))

        # averaging gradient
        gradient = []
        for grad in zip(*gradients):
            gradient.append(tf.add_n(grad) / len(loss))
    # single GPU case
    else:
        gradient = tf.gradients(loss, var_list)

    gradient, _ = tf.clip_by_global_norm(gradient, opt.clip_grad_norm)

    # gradient update op
    with tf.device('/gpu:0'):
        grad_var = [(g, v) for g, v in zip(gradient, var_list)]
        grad_op = optim.apply_gradients(grad_var, global_step=tf.sg_global_step())

    # add summary using last tower value
    for g, v in grad_var:
        # exclude batch normal statics
        if 'mean' not in v.name and 'variance' not in v.name \
                and 'beta' not in v.name and 'gamma' not in v.name:
            tf.sg_summary_gradient(v, g)

    # extra update ops within category ( for example, batch normal running stat update )
    if isinstance(opt.category, (tuple, list)):
        update_op = []
        for cat in opt.category:
            update_op.extend([t for t in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if t.name.startswith(cat)])
    else:
        update_op = [t for t in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if t.name.startswith(opt.category)]

    return tf.group(*([grad_op] + update_op))