def __init__(self, batch_size=128, reshape=False, one_hot=False): # load sg_data set data_set = input_data.read_data_sets(Mnist._data_dir, reshape=reshape, one_hot=one_hot) self.batch_size = batch_size # save each sg_data set _train = data_set.train _valid = data_set.validation _test = data_set.test # member initialize self.train, self.valid, self.test = tf.sg_opt(), tf.sg_opt, tf.sg_opt() # convert to tensor queue self.train.image, self.train.label = \ _data_to_tensor([_train.images, _train.labels.astype('int32')], batch_size, name='train') self.valid.image, self.valid.label = \ _data_to_tensor([_valid.images, _valid.labels.astype('int32')], batch_size, name='valid') self.test.image, self.test.label = \ _data_to_tensor([_test.images, _test.labels.astype('int32')], batch_size, name='test') # calc total batch count self.train.num_batch = _train.labels.shape[0] // batch_size self.valid.num_batch = _valid.labels.shape[0] // batch_size self.test.num_batch = _test.labels.shape[0] // batch_size
def sg_pool1d(tensor, opt): r"""Performs the 1-D pooling on the `tensor`. Args: tensor: A 3-D `Tensor` (automatically passed by decorator). opt: size: A positive `integer` representing `[kernel width]`. Default is 2. stride: A positive `integer`. The number of entries by which the filter is moved right at each step. Default is 2. avg: Boolean. If True, average pooling is applied. Otherwise, max pooling. name: If provided, replace current tensor's name. Returns: A tensor """ # default stride and pad opt += tf.sg_opt(stride=2, pad='VALID') opt += tf.sg_opt(size=opt.stride) if opt.avg: out = tf.nn.avg_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1), (1, opt.stride, 1, 1), opt.pad) else: out = tf.nn.max_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1), (1, opt.stride, 1, 1), opt.pad) return tf.identity(out.sg_squeeze(dim=2), name=opt.name)
def sg_optim(loss, **kwargs): opt = tf.sg_opt(kwargs) # default training options opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='') # select optimizer if opt.optim == 'MaxProp': optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr, beta2=opt.beta2) elif opt.optim == 'AdaMax': optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2) # get trainable variables var_list = [ t for t in tf.trainable_variables() if t.name.encode('utf8').startswith(opt.category) ] # calc gradient gradient = optim.compute_gradients(loss, var_list=var_list) # add summary for v, g in zip(var_list, gradient): tf.sg_summary_gradient(v, g) # gradient update op return optim.apply_gradients(gradient, global_step=tf.sg_global_step())
def sg_reuse(tensor, **opt): opt = tf.sg_opt(opt) assert hasattr(tensor, '_sugar'), 'cannot reuse this node.' assert opt.input is not None, 'input is mandatory.' # get all nodes in this graph nodes, prev = [tensor], tensor._sugar.prev while prev is not None: nodes = [prev] + nodes prev = prev._sugar.prev if hasattr(prev, '_sugar') else None # create graph again for this input out = opt.input for node in nodes[1:]: # exclude head node if node._sugar.is_layer: fn = tf.sg_layer_func(node._sugar.func) if node._sugar.arg.context_name: with tf.variable_scope(node._sugar.arg.context_name): out = fn( out, **(node._sugar.arg + tf.sg_opt(name=node._sugar.name, reuse=True))) else: out = fn( out, **(node._sugar.arg + tf.sg_opt(name=node._sugar.name, reuse=True))) else: out = node._sugar.func(out, node._sugar.arg) return out
def sg_train(**kwargs): r"""Trains the model. Args: **kwargs: optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', or 'sgd'. loss: A 0-D `Tensor` containing the value to minimize. lr: A Python Scalar (optional). Learning rate. Default is .001. beta1: A Python Scalar (optional). Default is .9. beta2: A Python Scalar (optional). Default is .99. eval_metric: A list of tensors containing the value to evaluate. Default is []. early_stop: Boolean. If True (default), the training should stop when the following two conditions are met. i. Current loss is less than .95 * previous loss. ii. Current learning rate is less than 5e-6. lr_reset: Boolean. If True, learning rate is set to opt.lr. when training restarts. Otherwise (Default), the value of the stored `_learning_rate` is taken. save_dir: A string. The root path to which checkpoint and log files are saved. Default is `asset/train`. max_ep: A positive integer. Maximum number of epochs. Default is 1000. ep_size: A positive integer. Number of Total batches in an epoch. For proper display of log. Default is 1e5. save_interval: A Python scalar. The interval of saving checkpoint files. By default, for every 600 seconds, a checkpoint file is written. log_interval: A Python scalar. The interval of recoding logs. By default, for every 60 seconds, logging is executed. max_keep: A positive integer. Maximum number of recent checkpoints to keep. Default is 5. keep_interval: A Python scalar. How often to keep checkpoints. Default is 1 hour. tqdm: Boolean. If True (Default), progress bars are shown. console_log: Boolean. If True, a series of loss will be shown on the console instead of tensorboard. Default is False. """ opt = tf.sg_opt(kwargs) assert opt.loss is not None, 'loss is mandatory.' # default training options opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='') # get optimizer train_op = sg_optim(opt.loss, optim=opt.optim, lr=_learning_rate, beta1=opt.beta1, beta2=opt.beta2, category=opt.category) # define train function # noinspection PyUnusedLocal @sg_train_func def train_func(sess, arg): return sess.run([opt.loss, train_op])[0] # run train function train_func(**opt)
def sg_train(**kwargs): opt = tf.sg_opt(kwargs) assert opt.loss is not None, 'loss is mandatory.' # default training options opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='') # get optimizer train_op = sg_optim(opt.loss, optim=opt.optim, lr=_learning_rate, beta1=opt.beta1, beta2=opt.beta2, category=opt.category) # define train function @sg_train_func def train_func(sess, arg): return sess.run([opt.loss, train_op])[0] # run train function train_func(**opt)
def wrapper(tensor, **kwargs): # call sugar function out = func(tensor, tf.sg_opt(kwargs)) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs)+sg_get_context(), prev=tensor) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def classifier_train(**kwargs): r"""Trains the model. Args: **kwargs: optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', 'RMSProp' or 'sgd'. loss: A 0-D `Tensor` containing the value to minimize. lr: A Python Scalar (optional). Learning rate. Default is .001. beta1: A Python Scalar (optional). Default is .9. beta2: A Python Scalar (optional). Default is .99. clip_grad_norm : A Python Scalar (optional). Default is 10 save_dir: A string. The root path to which checkpoint and log files are saved. Default is `asset/train`. max_ep: A positive integer. Maximum number of epochs. Default is 1000. ep_size: A positive integer. Number of Total batches in an epoch. For proper display of log. Default is 1e5. save_interval: A Python scalar. The interval of saving checkpoint files. By default, for every 600 seconds, a checkpoint file is written. log_interval: A Python scalar. The interval of recoding logs. By default, for every 60 seconds, logging is executed. max_keep: A positive integer. Maximum number of recent checkpoints to keep. Default is 5. keep_interval: A Python scalar. How often to keep checkpoints. Default is 1 hour. category: Scope name or list to train eval_metric: A list of tensors containing the value to evaluate. Default is []. tqdm: Boolean. If True (Default), progress bars are shown. If False, a series of loss will be shown on the console. """ opt = tf.sg_opt(kwargs) assert opt.loss is not None, 'loss is mandatory.' # default training options opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='', ep_size=100000, clip_grad_norm=10) # get optimizer train_op = sg_optim(opt.loss, optim=opt.optim, lr=0.001, clip_grad_norm=opt.clip_grad_norm, beta1=opt.beta1, beta2=opt.beta2, category=opt.category) # for console logging loss_ = opt.loss # use only first loss when multiple GPU case if isinstance(opt.loss, (tuple, list)): loss_ = opt.loss[0] # define train function # noinspection PyUnusedLocal @sg_train_func def train_func(sess, arg): return sess.run([loss_, train_op])[0] # run train function train_func(**opt)
def wrapper(**kwargs): r"""Manages arguments of `tf.sg_opt`. Args: **kwargs: source: A source queue list to enqueue dtypes: Data types of each tensor capacity: Queue capacity. Default is 32. num_threads: Number of threads. Default is 1. """ # default option opt = tf.sg_opt(kwargs) + tf.sg_opt( dtypes=[tf.sg_floatx], capacity=32, num_threads=1) # source queue list check assert opt.source is not None, 'source is mandatory.' if type(opt.source) is not list and type(opt.source) is not tuple: opt.source = [opt.source] if type(opt.dtypes) is not list and type(opt.dtypes) is not tuple: opt.dtypes = [opt.dtypes] assert len(opt.source) == len( opt.dtypes), 'Source and dtypes should have same length.' # enqueue function def enqueue_func(sess, op): # read data from source queue data = func(sess.run(opt.source)) # create feeder dict feed_dict = {} for ph, col in zip(placeholders, data): feed_dict[ph] = col # run session sess.run(op, feed_dict=feed_dict) # create place holder list placeholders = [] for dtype in opt.dtypes: placeholders.append(tf.placeholder(dtype=dtype)) # create FIFO queue queue = tf.FIFOQueue(opt.capacity, dtypes=opt.dtypes) # enqueue operation enqueue_op = queue.enqueue(placeholders) # create queue runner runner = _FuncQueueRunner(enqueue_func, queue, [enqueue_op] * opt.num_threads) # register to global collection tf.train.add_queue_runner(runner) # return de-queue operation return queue.dequeue()
def sg_context(**kwargs): global _context # set options when enter _context = tf.sg_opt(kwargs) if _context.name: _context.context_name = _context.name _context.name = None with tf.variable_scope(_context.context_name): yield else: yield # clear options when exit _context = tf.sg_opt()
def sg_pool1d(tensor, opt): # default stride and pad opt += tf.sg_opt(stride=2, pad='VALID') opt += tf.sg_opt(size=opt.stride) if opt.avg: out = tf.nn.avg_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1), (1, opt.stride, 1, 1), opt.pad) else: out = tf.nn.max_pool(tensor.sg_expand_dims(dim=2), (1, opt.size, 1, 1), (1, opt.stride, 1, 1), opt.pad) return tf.identity(out.sg_squeeze(dim=2), name=opt.name)
def _densenet_graph(x, opt, nums): # default option opt += tf.sg_opt(num_class=1000, k=32, conv_only=False, squeeze=True, act='relu') # convolution layers ( dense net arch ) with tf.sg_context(name=opt.name): conv = (x .sg_conv(dim=opt.k, size=7, stride=2, bias=False, reuse=opt.reuse, name='conv1') .sg_pool(size=3, stride=2, pad='SAME') .sg_densenet_layer(dim=opt.k, num=nums[0], act=opt.act, reuse=opt.reuse, name='conv2') .sg_densenet_layer(dim=opt.k, num=nums[1], act=opt.act, reuse=opt.reuse, name='conv3') .sg_densenet_layer(dim=opt.k, num=nums[2], act=opt.act, reuse=opt.reuse, name='conv4') .sg_densenet_layer(dim=opt.k, num=nums[3], act=opt.act, trans=False, reuse=opt.reuse, name='conv5') .sg_bypass(act=opt.act, bn=True, reuse=opt.reuse, name='final_act') # final activation .sg_pool(size=7, stride=1, avg=True)) # global average pool # fully convolution layers fc = conv.sg_conv(dim=opt.num_class, size=1, act='linear', bn=False, reuse=opt.reuse, name='fc') # return selectively if opt.conv_only: return conv else: if opt.squeeze: return fc.sg_squeeze(dim=(1, 2)) else: return fc
def sg_aconv(tensor, opt): r"""Applies a 2-D atrous (or dilated) convolution. Args: tensor: A 4-D `Tensor` (automatically passed by decorator). opt: size: A tuple/list of positive integers of length 2 representing `[kernel height, kernel width]`. Can be an integer if both values are the same. If not specified, (3, 3) is set automatically. rate: A positive integer. The stride with which we sample input values across the `height` and `width` dimensions. Default is 2. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. pad: Either `SAME` (Default) or `VALID`. bias: Boolean. If True, biases are added. regularizer: A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization summary: If True, summaries are added. The default is True. Returns: A `Tensor` with the same type as `tensor`. """ # default options opt += tf.sg_opt(size=(3, 3), rate=2, pad='SAME') opt.size = opt.size if isinstance(opt.size, (tuple, list)) else [opt.size, opt.size] # parameter tf.sg_initializer w = tf.sg_initializer.he_uniform('W', (opt.size[0], opt.size[1], opt.in_dim, opt.dim), regularizer=opt.regularizer, summary=opt.summary) b = tf.sg_initializer.constant('b', opt.dim, summary=opt.summary) if opt.bias else 0 # apply convolution out = tf.nn.atrous_conv2d(tensor, w, rate=opt.rate, padding=opt.pad) + b return out
def sg_emb(**kwargs): r"""Returns an embedding layer or a look-up table. Args: name: A name for the layer (required). emb: A 2-D array. Has the shape of `[vocabulary size -1, embedding dimension size]`. Note that the first row is filled with 0's because they correspond to padding. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. voca_size: A positive int32. Returns: A 2-D tensor. """ opt = tf.sg_opt(kwargs) assert opt.name is not None, 'name is mandatory.' import sg_initializer as init if opt.emb is None: # initialize embedding matrix assert opt.voca_size is not None, 'voca_size is mandatory.' assert opt.dim is not None, 'dim is mandatory.' w = init.he_uniform(opt.name, (opt.voca_size - 1, opt.dim)) else: # use given embedding matrix w = init.external(opt.name, value=opt.emb) # 1st row should be zero and not be updated by backprop because of zero padding. emb = tf.concat(0, [tf.zeros((1, opt.dim), dtype=tf.sg_floatx), w]) return emb
def sg_conv1d(tensor, opt): r"""Applies a 1-D convolution. Args: tensor: A `Tensor`. size: An `integer` representing `[kernel width]`. If not specified, 2 is set implicitly. stride: An `integer`. The number of entries by which the filter is moved right at each step. in_dim: An `integer`. The size of input dimension. dim: An `integer`. The size of output dimension. pad: Either `SAME` (Default) or `VALID`. bias: Boolean. Whether to add biases to the filters. Returns: A `Tensor` with the same type as `tensor`. """ # default options opt += tf.sg_opt(size=2, stride=1, pad='SAME') # parameter initialize w = init.he_uniform('W', (opt.size, opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) # apply convolution out = tf.nn.conv1d(tensor, w, stride=opt.stride, padding=opt.pad) + (b if opt.bias else 0) return out
def sg_conv1d(tensor, opt): r"""Applies a 1-D convolution. Args: tensor: A 3-D `Tensor` (automatically passed by decorator). opt: size: A positive `integer` representing `[kernel width]`. If not specified, 2 is set implicitly. stride: A positive `integer`. The number of entries by which the filter is moved right at each step. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. pad: Either `SAME` (Default) or `VALID`. bias: Boolean. If True, biases are added. regularizer: A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization summary: If True, summaries are added. The default is True. Returns: A `Tensor` with the same type as `tensor`. """ # default options opt += tf.sg_opt(size=2, stride=1, pad='SAME') # parameter tf.sg_initializer w = tf.sg_initializer.he_uniform('W', (opt.size, opt.in_dim, opt.dim), regularizer=opt.regularizer, summary=opt.summary) b = tf.sg_initializer.constant('b', opt.dim, summary=opt.summary) if opt.bias else 0 # apply convolution out = tf.nn.conv1d(tensor, w, stride=opt.stride, padding=opt.pad) + b return out
def sg_aconv(tensor, opt): r"""Applies a 2-D atrous (or dilated) convolution. Args: tensor: A 4-D `Tensor`. size: A tuple or list of integers of length 2 representing `[kernel height, kernel width]`. Can be an int if both values are the same. If not specified, (3, 3) is set automatically. rate: A positive int32. The stride with which we sample input values across the `height` and `width` dimensions. Default is 2. in_dim: An `integer`. The size of input dimension. dim: An `integer`. The size of output dimension. pad: Either `SAME` (Default) or `VALID`. bias: Boolean. Whether to add biases to the filters. Returns: A `Tensor` with the same type as `tensor`. """ # default options opt += tf.sg_opt(size=(3, 3), rate=2, pad='SAME') opt.size = opt.size if isinstance(opt.size, (tuple, list)) else [opt.size, opt.size] # parameter initialize w = init.he_uniform('W', (opt.size[0], opt.size[1], opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) # apply convolution out = tf.nn.atrous_conv2d(tensor, w, rate=opt.rate, padding=opt.pad) + (b if opt.bias else 0) return out
def sg_ctc(tensor, opt): r"""Computes the CTC (Connectionist Temporal Classification) Loss between `tensor` and `target`. Args: tensor: A 3-D `float Tensor`. opt: target: A `Tensor` with the same length in the first dimension as the `tensor`. Labels. ( Dense tensor ) name: A `string`. A name to display in the tensor board web UI. Returns: A 1-D `Tensor` with the same length in the first dimension of the `tensor`. For example, ``` tensor = [[[2., -1., 3.], [3., 1., -2.]], [[1., -1., 2.], [3., 1., -2.]]] target = [[2., 1.], [2., 3.]] tensor.sg_ctc(target=target) => [ 4.45940781 2.43091154] ``` """ assert opt.target is not None, 'target is mandatory.' # default sequence length shape = tf.shape(tensor) opt += tf.sg_opt(seq_len=tf.ones((shape[0],), dtype=tf.sg_intx) * shape[1], merge=True) # ctc loss out = tf.nn.ctc_loss(opt.target.sg_to_sparse(), tensor, opt.seq_len, ctc_merge_repeated=opt.merge, time_major=False) out = tf.identity(out, 'ctc') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def sg_emb(**kwargs): r"""Returns a look-up table for embedding. kwargs: name: A name for the layer. emb: A 2-D array (optional). If None, the resulting tensor should have the shape of `[vocabulary size, embedding dimension size]`. Note that its first row is filled with 0's associated with padding. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. voca_size: A positive integer. The size of vocabulary. Returns: A 2-D `Tensor` of float32. """ opt = tf.sg_opt(kwargs) assert opt.name is not None, 'name is mandatory.' if opt.emb is None: # initialize embedding matrix assert opt.voca_size is not None, 'voca_size is mandatory.' assert opt.dim is not None, 'dim is mandatory.' w = tf.sg_initializer.he_uniform(opt.name, (opt.voca_size - 1, opt.dim)) else: # use given embedding matrix w = tf.sg_initializer.external(opt.name, value=opt.emb) # 1st row should be zero and not be updated by backprop because of zero padding. emb = tf.concat(0, [tf.zeros((1, opt.dim), dtype=tf.sg_floatx), w]) return emb
def ner_accuracy(tensor, opt): r"""Returns accuracy of predictions. Args: tensor: A `Tensor`. Probability distributions or unscaled prediction scores. opt: target: A 'Tensor`. Labels. Returns: A `Tensor` of the same shape as `tensor`. Each value will be 1 if correct else 0. For example, ``` tensor = [[20.1, 18, -4.2], [0.04, 21.1, 31.3]] target = [[0, 1]] tensor.sg_accuracy(target=target) => [[ 1. 0.]] ``` """ assert opt.target is not None, 'target is mandatory.' opt += tf.sg_opt(k=1) # # calc accuracy out = tf.identity(tf.equal(tensor.sg_argmax() + 1, tf.cast(opt.target, tf.int64)).sg_float(), name='acc') # out = tf.identity(tf.nn.in_top_k(tensor, opt.target, opt.k).sg_float(), name='acc') # masking padding if opt.mask: out += tf.equal(opt.target, tf.zeros_like(opt.target)).sg_float() return out
def sg_conv1d(tensor, opt): r"""Applies a 1-D convolution. Args: tensor: A 3-D `Tensor` (automatically passed by decorator). opt: size: A positive `integer` representing `[kernel width]`. If not specified, 2 is set implicitly. stride: A positive `integer`. The number of entries by which the filter is moved right at each step. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. pad: Either `SAME` (Default) or `VALID`. bias: Boolean. If True, biases are added. Returns: A `Tensor` with the same type as `tensor`. """ # default options opt += tf.sg_opt(size=2, stride=1, pad='SAME') # parameter tf.sg_initializer w = tf.sg_initializer.he_uniform('W', (opt.size, opt.in_dim, opt.dim)) b = tf.sg_initializer.constant('b', opt.dim) if opt.bias else 0 # apply convolution out = tf.nn.conv1d(tensor, w, stride=opt.stride, padding=opt.pad) + b return out
def sg_upconv(tensor, opt): # default options opt += tf.sg_opt(size=(3, 3), stride=(1, 2, 2, 1), pad='SAME') opt.size = opt.size if isinstance(opt.size, (tuple, list)) else [opt.size, opt.size] opt.stride = opt.stride if isinstance( opt.stride, (tuple, list)) else [1, opt.stride, opt.stride, 1] opt.stride = [1, opt.stride[0], opt.stride[1], 1] if len( opt.stride) == 2 else opt.stride # parameter initialize w = init.he_uniform('W', (opt.size[0], opt.size[1], opt.dim, opt.in_dim)) if opt.bias: b = init.constant('b', opt.dim) # tedious shape handling for conv2d_transpose shape = tensor.get_shape().as_list() out_shape = [ tf.shape(tensor)[0], shape[1] * opt.stride[1], shape[2] * opt.stride[2], opt.dim ] # apply convolution out = tf.nn.conv2d_transpose(tensor, w, output_shape=tf.pack(out_shape), strides=opt.stride, padding=opt.pad) + (b if opt.bias else 0) # reset shape is needed because conv2d_transpose() erase all shape information. out.set_shape([None, out_shape[1], out_shape[2], opt.dim]) return out
def sg_quasi_conv1d(tensor, opt): opt += tf.sg_opt(is_enc=False) # Split into H and H_zfo H = tensor[:Hp.bs] H_z = tensor[Hp.bs:2 * Hp.bs] H_f = tensor[2 * Hp.bs:3 * Hp.bs] H_o = tensor[3 * Hp.bs:] if opt.is_enc: H_z, H_f, H_o = 0, 0, 0 # Convolution and merging with tf.sg_context(act="linear", causal=(not opt.is_enc), bn=opt.is_enc, ln=(not opt.is_enc)): Z = H.sg_aconv1d() + H_z # (16, 300, 320) F = H.sg_aconv1d() + H_f # (16, 300, 320) O = H.sg_aconv1d() + H_o # (16, 300, 320) # Activation Z = Z.sg_bypass(act="tanh") # (16, 300, 320) F = F.sg_bypass(act="sigmoid") # (16, 300, 320) O = O.sg_bypass(act="sigmoid") # (16, 300, 320) # Masking M = tf.sign(tf.abs(H))[:, :, :1] # (16, 300, 1) float32. 0 or 1 Z *= M # broadcasting F *= M # broadcasting O *= M # broadcasting # Concat ZFO = tf.concat(axis=0, values=[Z, F, O]) return ZFO # (16*3, 150, 320)
def sg_res_block(tensor, opt): # default rate opt += tf.sg_opt(size=3, rate=1, causal=False) # input dimension in_dim = tensor.get_shape().as_list()[-1] # reduce dimension input_ = (tensor.sg_bypass(act='relu', bn=(not opt.causal), ln=opt.causal).sg_conv1d(size=1, dim=in_dim / 2, act='relu', bn=(not opt.causal), ln=opt.causal)) # 1xk conv dilated out = input_.sg_aconv1d(size=opt.size, rate=opt.rate, causal=opt.causal, act='relu', bn=(not opt.causal), ln=opt.causal) # dimension recover and residual connection out = out.sg_conv1d(size=1, dim=in_dim) + tensor return out
def sg_periodic_shuffle(tensor, opt): # default factor opt += tf.sg_opt(factor=2) # get current shape batch, row, col, channel = tensor.get_shape().as_list() # get target channel num channel_target = channel / (opt.factor * opt.factor) channel_factor = channel / channel_target # intermediate shape for shuffling shape_1 = [ batch, row, col, channel_factor / opt.factor, channel_factor / opt.factor ] shape_2 = [batch, row * opt.factor, col * opt.factor, 1] # reshape and transpose for periodic shuffling for each channel out = [] for i in range(channel_target): out.append( (tensor[:, :, :, i * channel_factor:(i + 1) * channel_factor]).sg_reshape(shape=shape_1).sg_transpose( perm=(0, 1, 3, 2, 4)).sg_reshape(shape=shape_2)) # final output out = tf.concat(3, out) return tf.identity(out, name=opt.name)
def sg_aconv1d(tensor, opt): # default options opt += tf.sg_opt(size=(2 if opt.causal else 3), rate=1, pad='SAME') # parameter initialize w = init.he_uniform('W', (1, opt.size, opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) if opt.causal: # pre-padding for causality if opt.pad == 'SAME': pad_len = (opt.size - 1) * opt.rate # padding size x = tf.pad(tensor, [[0, 0], [pad_len, 0], [0, 0]]).sg_expand_dims(dim=1) else: x = tensor.sg_expand_dims(dim=1) # apply 2d convolution out = tf.nn.atrous_conv2d(x, w, rate=opt.rate, padding='VALID') + (b if opt.bias else 0) else: # apply 2d convolution out = tf.nn.atrous_conv2d( tensor.sg_expand_dims(dim=1), w, rate=opt.rate, padding=opt.pad) + (b if opt.bias else 0) # reduce dimension out = out.sg_squeeze(dim=1) return out
def sg_res_block(tensor, opt): # default rate opt += tf.sg_opt(size=3, rate=1, causal=False, is_first=False) # input dimension in_dim = tensor.get_shape().as_list()[-1] with tf.sg_context(dev=opt.dev, reuse=opt.reuse_vars): #reduce dim input_ = (tensor.sg_bypass_gpus( act='leaky_relu', ln=(not opt.is_first), name="relu_" + opt.name).sg_conv1d_gpus(size=1, dim=in_dim / 2, act='leaky_relu', ln=opt.causal, name="convi_" + opt.name)) # 1xk conv dilated out = input_.sg_aconv1d_gpus(size=opt.size, rate=opt.rate, causal=opt.causal, act='leaky_relu', ln=opt.causal, name="aconv_" + opt.name) # dimension recover and residual connection out = out.sg_conv1d_gpus(size=1, dim=in_dim, name="convo_" + opt.name) + tensor return out
def sg_res_block(tensor, opt): # default rate opt += tf.sg_opt(size=3, rate=1, causal=False, is_first=False, dout=0) # input dimension in_dim = tensor.get_shape().as_list()[-1] with tf.sg_context(name='block_%d_%d' % (opt.block, opt.rate)): # reduce dimension input_ = (tensor.sg_bypass(act='relu', ln=(not opt.is_first), name='bypass').sg_conv1d( size=1, dim=in_dim / 2, act='relu', ln=True, regularizer=reg_type, name='conv_in')) # 1xk conv dilated out = (input_.sg_aconv1d(size=opt.size, rate=opt.rate, causal=opt.causal, act='relu', ln=True, regularizer=reg_type, name='aconv')) # dimension recover and residual connection out = out.sg_conv1d( size=1, dim=in_dim, regularizer=reg_type, name='conv_out') + tensor out = out.identity(ln=True, name='layer_norm') return out
def sg_inverse_periodic_shuffle(tensor, opt): # default factor opt += tf.sg_opt(factor=2) # get current shape batch, row, col, channel = tensor.get_shape().as_list() # get target shape and channel num channel_factor = opt.factor * opt.factor # intermediate shape for shuffling shape_1 = [ batch, row / opt.factor, col / opt.factor, channel_factor // opt.factor, channel_factor // opt.factor ] shape_2 = [batch, row / opt.factor, col / opt.factor, channel_factor] # reshape and transpose for periodic shuffling for each channel out = [] for i in range(channel): out.append(tensor[:, :, :, i].sg_expand_dims().sg_reshape( shape=shape_1).sg_transpose(perm=(0, 1, 3, 2, 4)).sg_reshape(shape=shape_2)) # final output out = tf.concat(3, out) return tf.identity(out, name=opt.name)
def sg_reverse_seq(tensor, opt): r"""Reverses variable length slices. Before applying the pure tensorflow function tf.reverse_sequence, this function calculates sequence lengths by counting non-zeros. For example, ``` tensor = [[1, 2, 3, 0, 0], [4, 5, 0, 0, 0]] tensor.sg_reverse_seq() => [[3 2 1 0 0] [5 4 0 0 0]] ``` Args: tensor: A 2-D `Tensor` (automatically given by chain). opt: dim: Dimension to reverse. Default is 1. name : If provided, it replaces current tensor's name. Returns: A `Tensor` with the same shape and type as `tensor`. """ # default sequence dimension opt += tf.sg_opt(dim=1) seq_len = tf.not_equal(tensor, tf.zeros_like(tensor)).sg_int().sg_sum(dims=opt.dim) return tf.reverse_sequence(tensor, seq_len, opt.dim, name=opt.name)
def sg_res_block(tensor, opt): # default rate opt += tf.sg_opt(size=3, rate=1, causal=False, is_first=False, dout=0) # input dimension in_dim = tensor.get_shape().as_list()[-1] with tf.sg_context(name='block_%d_%d' % (opt.block, opt.rate)): # reduce dimension input_ = (tensor .sg_bypass(act='relu', ln=(not opt.is_first), name='bypass') .sg_conv1d(size=1, dim=in_dim / 2, act='relu', ln=True, regularizer=reg_type, name='conv_in')) # 1xk conv dilated out = (input_ .sg_aconv1d(size=opt.size, rate=opt.rate, causal=opt.causal, act='relu', ln=True, regularizer=reg_type, name='aconv')) # dimension recover and residual connection out = out.sg_conv1d(size=1, dim=in_dim, regularizer=reg_type, name='conv_out') + tensor out = out.identity(ln=True, name='layer_norm') return out
def wrapper(**kwargs): r""" Manages arguments of `tf.sg_opt`. Args: **kwargs: lr: A Python Scalar (optional). Learning rate. Default is .001. save_dir: A string. The root path to which checkpoint and log files are saved. Default is `asset/train`. max_ep: A positive integer. Maximum number of epochs. Default is 1000. ep_size: A positive integer. Number of Total batches in an epoch. For proper display of log. Default is 1e5. save_interval: A Python scalar. The interval of saving checkpoint files. By default, for every 600 seconds, a checkpoint file is written. log_interval: A Python scalar. The interval of recoding logs. By default, for every 60 seconds, logging is executed. max_keep: A positive integer. Maximum number of recent checkpoints to keep. Default is 5. keep_interval: A Python scalar. How often to keep checkpoints. Default is 1 hour. eval_metric: A list of tensors containing the value to evaluate. Default is []. tqdm: Boolean. If True (Default), progress bars are shown. If False, a series of loss will be shown on the console. """ opt = tf.sg_opt(kwargs) # default training options opt += tf.sg_opt(lr=0.001, save_dir='asset/train', max_ep=1000, ep_size=100000, save_interval=600, log_interval=60, eval_metric=[], max_keep=5, keep_interval=1, tqdm=True) # training epoch and loss epoch, loss = -1, None # checkpoint saver saver = tf.train.Saver(max_to_keep=opt.max_keep, keep_checkpoint_every_n_hours=opt.keep_interval) # add evaluation summary for m in opt.eval_metric: tf.sg_summary_metric(m) # summary writer log_dir = opt.save_dir + '/run-%02d%02d-%02d%02d' % tuple(tf.time.localtime(tf.time.time()))[1:5] summary_writer = tf.summary.FileWriter(log_dir) # console logging function def console_log(sess_): if epoch >= 0: tf.sg_info('\tEpoch[%03d:gs=%d] - loss = %s' % (epoch, sess_.run(tf.sg_global_step()), ('NA' if loss is None else '%8.6f' % loss))) local_init_op = tf.group(tf.sg_phase().assign(True), tf.tables_initializer(), tf.local_variables_initializer()) # create supervisor sv = tf.train.Supervisor(logdir=opt.save_dir, saver=saver, save_model_secs=opt.save_interval, summary_writer=summary_writer, save_summaries_secs=opt.log_interval, global_step=tf.sg_global_step(), local_init_op=local_init_op) # create session with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: # console logging loop if not opt.tqdm: sv.loop(opt.log_interval, console_log, args=(sess,)) # get start epoch _step = sess.run(tf.sg_global_step()) ep = _step // opt.ep_size best_f1 = 0 # check if already finished if ep <= opt.max_ep: # logging tf.sg_info('Training started from epoch[%03d]-step[%d].' % (ep, _step)) # epoch loop for ep in range(ep, opt.max_ep + 1): # update epoch info start_step = sess.run(tf.sg_global_step()) % opt.ep_size epoch = ep # create progressbar iterator if opt.tqdm: iterator = tf.tqdm(range(start_step, opt.ep_size), total=opt.ep_size, initial=start_step, desc='train', ncols=70, unit='b', leave=False) else: iterator = range(start_step, opt.ep_size) # batch loop for _ in iterator: # exit loop if sv.should_stop(): break # call train function batch_loss = func(sess, opt) # loss history update if batch_loss is not None and \ not np.isnan(batch_loss.all()) and not np.isinf(batch_loss.all()): if loss is None: loss = np.mean(batch_loss) else: loss = loss * 0.9 + np.mean(batch_loss) * 0.1 # log epoch information console_log(sess) f1_stat = show_metrics(sv, sess, opt.eval_metric[2], opt.eval_metric[3], ep, opt.val_ep_size, 'val', use_tqdm=True) if f1_stat > best_f1: best_f1 = f1_stat max_model_file = opt.save_dir + max_model_name # save last version saver.save(sess, max_model_file) print("Improved F1 score, max model saved in file: %s" % max_model_file) print('Test metrics:') show_metrics(sv, sess, opt.test_metric[0], opt.test_metric[1], ep, opt.test_ep_size, 'test', use_tqdm=True) # save last version saver.save(sess, opt.save_dir + '/model.ckpt', global_step=sess.run(tf.sg_global_step())) # logging tf.sg_info('Training finished at epoch[%d]-step[%d].' % (ep, sess.run(tf.sg_global_step()))) else: tf.sg_info('Training already finished at epoch[%d]-step[%d].' % (ep - 1, sess.run(tf.sg_global_step())))
def sg_optim(loss, **kwargs): r"""Applies gradients to variables. Args: loss: A 0-D `Tensor` containing the value to minimize. list of 0-D tensor for Multiple GPU kwargs: optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', 'RMSProp' or 'sgd'. lr: A Python Scalar (optional). Learning rate. Default is .001. beta1: A Python Scalar (optional). Default is .9. beta2: A Python Scalar (optional). Default is .99. momentum : A Python Scalar for RMSProp optimizer (optional). Default is 0. category: A string or string list. Specifies the variables that should be trained (optional). Only if the name of a trainable variable starts with `category`, it's value is updated. Default is '', which means all trainable variables are updated. """ opt = tf.sg_opt(kwargs) # default training options opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, momentum=0., category='') # select optimizer if opt.optim == 'MaxProp': optim = tf.sg_optimize.MaxPropOptimizer(learning_rate=opt.lr, beta2=opt.beta2) elif opt.optim == 'AdaMax': optim = tf.sg_optimize.AdaMaxOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2) elif opt.optim == 'Adam': optim = tf.train.AdamOptimizer(learning_rate=opt.lr, beta1=opt.beta1, beta2=opt.beta2) elif opt.optim == 'RMSProp': optim = tf.train.RMSPropOptimizer(learning_rate=opt.lr, decay=opt.beta1, momentum=opt.momentum) else: optim = tf.train.GradientDescentOptimizer(learning_rate=opt.lr) # get trainable variables if isinstance(opt.category, (tuple, list)): var_list = [] for cat in opt.category: var_list.extend([t for t in tf.trainable_variables() if t.name.startswith(cat)]) else: var_list = [t for t in tf.trainable_variables() if t.name.startswith(opt.category)] # # calc gradient # # multiple GPUs case if isinstance(loss, (tuple, list)): gradients = [] # loop for each GPU tower for i, loss_ in enumerate(loss): # specify device with tf.device('/gpu:%d' % i): # give new scope only to operation with tf.name_scope('gpu_%d' % i): # add gradient calculation operation for each GPU tower gradients.append(tf.gradients(loss_, var_list)) # averaging gradient gradient = [] for grad in zip(*gradients): gradient.append(tf.add_n(grad) / len(loss)) # single GPU case else: gradient = tf.gradients(loss, var_list) gradient, _ = tf.clip_by_global_norm(gradient, opt.clip_grad_norm) # gradient update op with tf.device('/gpu:0'): grad_var = [(g, v) for g, v in zip(gradient, var_list)] grad_op = optim.apply_gradients(grad_var, global_step=tf.sg_global_step()) # add summary using last tower value for g, v in grad_var: # exclude batch normal statics if 'mean' not in v.name and 'variance' not in v.name \ and 'beta' not in v.name and 'gamma' not in v.name: tf.sg_summary_gradient(v, g) # extra update ops within category ( for example, batch normal running stat update ) if isinstance(opt.category, (tuple, list)): update_op = [] for cat in opt.category: update_op.extend([t for t in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if t.name.startswith(cat)]) else: update_op = [t for t in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if t.name.startswith(opt.category)] return tf.group(*([grad_op] + update_op))