def sg_rnn(tensor, opt): r"""Applies a simple rnn. Args: tensor: A 3-D `Tensor`. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. bias: Boolean. If True, biases are added. ln: Boolean. If True, layer normalization is applied. init_state: A 2-D `Tensor`. If None, the initial state is set to zeros. last_only: Boolean. If True, the outputs in the last time step are returned. Returns: A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim]. If last_only is True, the shape will be [batch size, dim]. """ # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step function def step(h, x): # simple rnn ### Replace tensor[:, i, :] with x. bryan ### y = ln( tf.matmul(tensor[:, i, :], w) + tf.matmul(h, u) + (b if opt.bias else 0)) return y # parameter initialize w = init.orthogonal('W', (opt.in_dim, opt.dim)) u = init.identity('U', opt.dim) if opt.bias: b = init.constant('b', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, out = init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step func h = step(h, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out
def sg_rnn(tensor, opt): # parameter initialize w = init.orthogonal('W', (opt.in_dim, opt.dim)) u = init.identity('U', opt.dim) if opt.bias: b = init.constant('b', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # permute dimension for scan loop xx = tf.transpose(tensor, [1, 0, 2]) # step func def step(h, x): # layer normalization def ln(xx, opt): if opt.ln: # calc layer mean, variance for final axis mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1]) # apply layer normalization ( explicit broadcasting needed ) broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1) xx = (xx - tf.reshape(mean, broadcast_shape)) \ / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape) # apply parameter return gamma * xx + beta # apply transform y = ln(tf.matmul(x, w) + tf.matmul(h, u) + (b if opt.bias else 0), opt) return y # loop by scan out = tf.scan(step, xx, init_h) # recover dimension out = tf.transpose(out, [1, 0, 2]) # last sequence only if opt.last_only: out = out[:, tensor.get_shape().as_list()[1]-1, :] return out
def sg_upconv(tensor, opt): # default options opt += tf.sg_opt(size=(3, 3), stride=(1, 2, 2, 1), pad='SAME') opt.size = opt.size if isinstance(opt.size, (tuple, list)) else [opt.size, opt.size] opt.stride = opt.stride if isinstance( opt.stride, (tuple, list)) else [1, opt.stride, opt.stride, 1] opt.stride = [1, opt.stride[0], opt.stride[1], 1] if len( opt.stride) == 2 else opt.stride # parameter initialize w = init.he_uniform('W', (opt.size[0], opt.size[1], opt.dim, opt.in_dim)) if opt.bias: b = init.constant('b', opt.dim) # tedious shape handling for conv2d_transpose shape = tensor.get_shape().as_list() out_shape = [ tf.shape(tensor)[0], shape[1] * opt.stride[1], shape[2] * opt.stride[2], opt.dim ] # apply convolution out = tf.nn.conv2d_transpose(tensor, w, output_shape=tf.pack(out_shape), strides=opt.stride, padding=opt.pad) + (b if opt.bias else 0) # reset shape is needed because conv2d_transpose() erase all shape information. out.set_shape([None, out_shape[1], out_shape[2], opt.dim]) return out
def sg_aconv1d(tensor, opt): # default options opt += tf.sg_opt(size=(2 if opt.causal else 3), rate=1, pad='SAME') # parameter initialize w = init.he_uniform('W', (1, opt.size, opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) if opt.causal: # pre-padding for causality if opt.pad == 'SAME': pad_len = (opt.size - 1) * opt.rate # padding size x = tf.pad(tensor, [[0, 0], [pad_len, 0], [0, 0]]).sg_expand_dims(dim=1) else: x = tensor.sg_expand_dims(dim=1) # apply 2d convolution out = tf.nn.atrous_conv2d(x, w, rate=opt.rate, padding='VALID') + (b if opt.bias else 0) else: # apply 2d convolution out = tf.nn.atrous_conv2d( tensor.sg_expand_dims(dim=1), w, rate=opt.rate, padding=opt.pad) + (b if opt.bias else 0) # reduce dimension out = out.sg_squeeze(dim=1) return out
def sg_aconv(tensor, opt): r"""Applies a 2-D atrous (or dilated) convolution. Args: tensor: A 4-D `Tensor`. size: A tuple or list of integers of length 2 representing `[kernel height, kernel width]`. Can be an int if both values are the same. If not specified, (3, 3) is set automatically. rate: A positive int32. The stride with which we sample input values across the `height` and `width` dimensions. Default is 2. in_dim: An `integer`. The size of input dimension. dim: An `integer`. The size of output dimension. pad: Either `SAME` (Default) or `VALID`. bias: Boolean. Whether to add biases to the filters. Returns: A `Tensor` with the same type as `tensor`. """ # default options opt += tf.sg_opt(size=(3, 3), rate=2, pad='SAME') opt.size = opt.size if isinstance(opt.size, (tuple, list)) else [opt.size, opt.size] # parameter initialize w = init.he_uniform('W', (opt.size[0], opt.size[1], opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) # apply convolution out = tf.nn.atrous_conv2d(tensor, w, rate=opt.rate, padding=opt.pad) + (b if opt.bias else 0) return out
def sg_conv1d(tensor, opt): r"""Applies a 1-D convolution. Args: tensor: A `Tensor`. size: An `integer` representing `[kernel width]`. If not specified, 2 is set implicitly. stride: An `integer`. The number of entries by which the filter is moved right at each step. in_dim: An `integer`. The size of input dimension. dim: An `integer`. The size of output dimension. pad: Either `SAME` (Default) or `VALID`. bias: Boolean. Whether to add biases to the filters. Returns: A `Tensor` with the same type as `tensor`. """ # default options opt += tf.sg_opt(size=2, stride=1, pad='SAME') # parameter initialize w = init.he_uniform('W', (opt.size, opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) # apply convolution out = tf.nn.conv1d(tensor, w, stride=opt.stride, padding=opt.pad) + (b if opt.bias else 0) return out
def sg_rnn(tensor, opt): # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step function def step(h, x): # simple rnn y = ln( tf.matmul(tensor[:, i, :], w) + tf.matmul(h, u) + (b if opt.bias else 0)) return y # parameter initialize w = init.orthogonal('W', (opt.in_dim, opt.dim)) u = init.identity('U', opt.dim) if opt.bias: b = init.constant('b', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, out = init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step func h = step(h, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out
def sg_dense(tensor, opt): # parameter initialize w = init.he_uniform('W', (opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) # apply transform out = tf.matmul(tensor, w) + (b if opt.bias else 0) return out
def sg_upconv(tensor, opt): r"""Applies a upconvolution (or convolution transpose). Args: tensor: A 4-D `Tensor`. size: A tuple or list of integers of length 2 representing `[kernel height, kernel width]`. Can be an int if both values are the same. If not specified, (3, 3) is set implicitly. The default value is [1, 2, 2, 1]. stride: A tuple or list of integers of length 2 or 4 representing stride dimensions. If the length is 2, i.e., (a, b), the stride is `[1, a, b, 1]`. If the length is 4, i.e., (a, b, c, d), the stride is `[a, b, c, d]`. Can be an int. If the length is an int, i.e., a, the stride is `[1, a, a, 1]`. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. pad: Either `SAME` (Default) or `VALID`. bias: Boolean. If True, biases are added. Returns: A `Tensor` with the same type as `tensor`. """ # default options opt += tf.sg_opt(size=(3, 3), stride=(1, 2, 2, 1), pad='SAME') opt.size = opt.size if isinstance(opt.size, (tuple, list)) else [opt.size, opt.size] opt.stride = opt.stride if isinstance( opt.stride, (tuple, list)) else [1, opt.stride, opt.stride, 1] opt.stride = [1, opt.stride[0], opt.stride[1], 1] if len( opt.stride) == 2 else opt.stride # parameter initialize w = init.he_uniform('W', (opt.size[0], opt.size[1], opt.dim, opt.in_dim)) if opt.bias: b = init.constant('b', opt.dim) # tedious shape handling for conv2d_transpose shape = tensor.get_shape().as_list() out_shape = [ tf.shape(tensor)[0], shape[1] * opt.stride[1], shape[2] * opt.stride[2], opt.dim ] # apply convolution out = tf.nn.conv2d_transpose(tensor, w, output_shape=tf.pack(out_shape), strides=opt.stride, padding=opt.pad) + (b if opt.bias else 0) # reset shape is needed because conv2d_transpose() erase all shape information. out.set_shape([None, out_shape[1], out_shape[2], opt.dim]) return out
def sg_conv1d(tensor, opt): # default options opt += tf.sg_opt(size=2, stride=1, pad='SAME') # parameter initialize w = init.he_uniform('W', (opt.size, opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) # apply convolution out = tf.nn.conv1d(tensor, w, stride=opt.stride, padding=opt.pad) + (b if opt.bias else 0) return out
def sg_aconv(tensor, opt): # default options opt += tf.sg_opt(size=(3, 3), rate=2, pad='VALID') opt.size = opt.size if isinstance(opt.size, (tuple, list)) else [opt.size, opt.size] # parameter initialize w = init.he_uniform('W', (opt.size[0], opt.size[1], opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) # apply convolution out = tf.nn.atrous_conv2d(tensor, w, rate=opt.rate, padding=opt.pad) + (b if opt.bias else 0) return out
def sg_conv(tensor, opt): # default options opt += tf.sg_opt(size=(3, 3), stride=(1, 1, 1, 1), pad='SAME') opt.size = opt.size if isinstance(opt.size, (tuple, list)) else [opt.size, opt.size] opt.stride = opt.stride if isinstance(opt.stride, (tuple, list)) else [1, opt.stride, opt.stride, 1] opt.stride = [1, opt.stride[0], opt.stride[1], 1] if len(opt.stride) == 2 else opt.stride # parameter initialize w = init.he_uniform('W', (opt.size[0], opt.size[1], opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) # apply convolution out = tf.nn.conv2d(tensor, w, strides=opt.stride, padding=opt.pad) + (b if opt.bias else 0) return out
def sg_aconv1d(tensor, opt): r"""Applies 1-D atrous (or dilated) convolution. Args: tensor: A 3-D `Tensor`. causal: Boolean. If True, zeros are padded before the time axis such that each activation unit doesn't have receptive neurons beyond the equivalent time step. size: An `integer` representing `[kernel width]`. As a default it is set to 2 if causal is True, 3 otherwise. rate: A positive int32. The stride with which we sample input values across the `height` and `width` dimensions. Default is 1. in_dim: An `integer`. The size of input dimension. dim: An `integer`. The size of output dimension. pad: Either `SAME` (Default) or `VALID`. bias: Boolean. Whether to add biases to the filters. Returns: A `Tensor` with the same type as `tensor`. """ # default options opt += tf.sg_opt(size=(2 if opt.causal else 3), rate=1, pad='SAME') # parameter initialize w = init.he_uniform('W', (1, opt.size, opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) if opt.causal: # pre-padding for causality if opt.pad == 'SAME': pad_len = (opt.size - 1) * opt.rate # padding size x = tf.pad(tensor, [[0, 0], [pad_len, 0], [0, 0]]).sg_expand_dims(dim=1) else: x = tensor.sg_expand_dims(dim=1) # apply 2d convolution out = tf.nn.atrous_conv2d(x, w, rate=opt.rate, padding='VALID') + (b if opt.bias else 0) else: # apply 2d convolution out = tf.nn.atrous_conv2d( tensor.sg_expand_dims(dim=1), w, rate=opt.rate, padding=opt.pad) + (b if opt.bias else 0) # reduce dimension out = out.sg_squeeze(dim=1) return out
def sg_dense(tensor, opt): r"""Applies a full connection. Args: tensor: A 2-D `Tensor`. in_dim: An `integer`. The size of input dimension. dim: An `integer`. The size of output dimension. bias: Boolean. If True, biases are added. Returns: A `Tensor` with the same type as `tensor`. """ # parameter initialize w = init.he_uniform('W', (opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) # apply transform out = tf.matmul(tensor, w) + (b if opt.bias else 0) return out
def sg_conv(tensor, opt): r"""Applies a 2-D convolution. Args: tensor: A 4-D `Tensor`. size: A tuple or list of integers of length 2 representing `[kernel height, kernel width]`. Can be an int if both values are the same. If not specified, (3, 3) is set implicitly. stride: A tuple or list of integers of length 2 or 4 representing stride dimensions. If the length is 2, i.e., (a, b), the stride is `[1, a, b, 1]`. If the length is 4, i.e., (a, b, c, d), the stride is `[a, b, c, d]`. Can be an int. If the length is an int, i.e., a, the stride is `[1, a, a, 1]`. The default value is [1, 1, 1, 1]. in_dim: An `integer`. The size of input dimension. dim: An `integer`. The size of output dimension. pad: Either `SAME` (Default) or `VALID`. bias: Boolean. If True, biases are added. Returns: A `Tensor` with the same type as `tensor`. """ # default options opt += tf.sg_opt(size=(3, 3), stride=(1, 1, 1, 1), pad='SAME') opt.size = opt.size if isinstance(opt.size, (tuple, list)) else [opt.size, opt.size] opt.stride = opt.stride if isinstance( opt.stride, (tuple, list)) else [1, opt.stride, opt.stride, 1] opt.stride = [1, opt.stride[0], opt.stride[1], 1] if len( opt.stride) == 2 else opt.stride # parameter initialize w = init.he_uniform('W', (opt.size[0], opt.size[1], opt.in_dim, opt.dim)) if opt.bias: b = init.constant('b', opt.dim) # apply convolution out = tf.nn.conv2d(tensor, w, strides=opt.stride, padding=opt.pad) + (b if opt.bias else 0) return out
def sg_lstm(tensor, opt): r"""Applies an LSTM. Args: tensor: A 3-D `Tensor`. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. bias: Boolean. If True, biases are added. ln: Boolean. If True, layer normalization is applied. init_state: A 2-D `Tensor`. If None, the initial state is set to zeros. last_only: Boolean. If True, the outputs in the last time step are returned. Returns: A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim]. If last_only is True, the shape will be [batch size, dim]. """ # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step func def step(h, c, x): # forget gate f = tf.sigmoid( ln( tf.matmul(x, w_f) + tf.matmul(h, u_f) + (b_f if opt.bias else 0))) # input gate i = tf.sigmoid( ln( tf.matmul(x, w_i) + tf.matmul(h, u_i) + (b_i if opt.bias else 0))) # new cell value cc = tf.tanh( ln( tf.matmul(x, w_c) + tf.matmul(h, u_c) + (b_c if opt.bias else 0))) # out gate o = tf.sigmoid( ln( tf.matmul(x, w_o) + tf.matmul(h, u_o) + (b_o if opt.bias else 0))) # cell update cell = f * c + i * cc # final output y = o * tf.tanh(cell) return y, cell # parameter initialize w_i = init.orthogonal('W_i', (opt.in_dim, opt.dim)) u_i = init.identity('U_i', opt.dim) w_f = init.orthogonal('W_f', (opt.in_dim, opt.dim)) u_f = init.identity('U_f', opt.dim) w_o = init.orthogonal('W_o', (opt.in_dim, opt.dim)) u_o = init.identity('U_o', opt.dim) w_c = init.orthogonal('W_c', (opt.in_dim, opt.dim)) u_c = init.identity('U_c', opt.dim) if opt.bias: b_i = init.constant('b_i', opt.dim) b_f = init.constant('b_f', opt.dim) b_o = init.constant('b_o', opt.dim, value=1) b_c = init.constant('b_c', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, c, out = init_h, init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step function h, c = step(h, c, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out
def sg_gru(tensor, opt): r"""Applies a GRU. Args: tensor: A 3-D `Tensor`. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. bias: Boolean. If True, biases are added. ln: Boolean. If True, layer normalization is applied. init_state: A 2-D `Tensor`. If None, the initial state is set to zeros. last_only: Boolean. If True, the outputs in the last time step are returned. Returns: A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim]. If last_only is True, the shape will be [batch size, dim]. """ # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step func def step(h, x): # update gate z = tf.sigmoid( ln( tf.matmul(x, w_z) + tf.matmul(h, u_z) + (b_z if opt.bias else 0))) # reset gate r = tf.sigmoid( ln( tf.matmul(x, w_r) + tf.matmul(h, u_r) + (b_r if opt.bias else 0))) # h_hat hh = tf.tanh( ln( tf.matmul(x, w_h) + tf.matmul(r * h, u_h) + (b_h if opt.bias else 0))) # final output y = (1. - z) * h + z * hh return y # parameter initialize w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim)) u_z = init.identity('U_z', opt.dim) w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim)) u_r = init.identity('U_r', opt.dim) w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim)) u_h = init.identity('U_h', opt.dim) if opt.bias: b_z = init.constant('b_z', opt.dim) b_r = init.constant('b_r', opt.dim) b_h = init.constant('b_h', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, out = init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step function h = step(h, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out
def sg_gru(tensor, opt): # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step func def step(h, x): # update gate z = tf.sigmoid( ln( tf.matmul(x, w_z) + tf.matmul(h, u_z) + (b_z if opt.bias else 0))) # reset gate r = tf.sigmoid( ln( tf.matmul(x, w_r) + tf.matmul(h, u_r) + (b_r if opt.bias else 0))) # h_hat hh = tf.tanh( ln( tf.matmul(x, w_h) + tf.matmul(r * h, u_h) + (b_h if opt.bias else 0))) # final output y = (1. - z) * h + z * hh return y # parameter initialize w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim)) u_z = init.identity('U_z', opt.dim) w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim)) u_r = init.identity('U_r', opt.dim) w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim)) u_h = init.identity('U_h', opt.dim) if opt.bias: b_z = init.constant('b_z', opt.dim) b_r = init.constant('b_r', opt.dim) b_h = init.constant('b_h', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, out = init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step function h = step(h, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out
def sg_gru(tensor, opt): # parameter initialize w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim)) u_z = init.identity('U_z', opt.dim) w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim)) u_r = init.identity('U_r', opt.dim) w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim)) u_h = init.identity('U_h', opt.dim) if opt.bias: b_z = init.constant('b_z', opt.dim) b_r = init.constant('b_r', opt.dim) b_h = init.constant('b_h', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # permute dimension for scan loop xx = tf.transpose(tensor, [1, 0, 2]) # step func def step(h, x): # layer normalization def ln(xx, opt): if opt.ln: # calc layer mean, variance for final axis mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1]) # apply layer normalization ( explicit broadcasting needed ) broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1) xx = (xx - tf.reshape(mean, broadcast_shape)) \ / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape) # apply parameter return gamma * xx + beta # update gate z = tf.sigmoid(ln(tf.matmul(x, w_z) + tf.matmul(h, u_z) + (b_z if opt.bias else 0), opt)) # reset gate r = tf.sigmoid(ln(tf.matmul(x, w_r) + tf.matmul(h, u_r) + (b_r if opt.bias else 0), opt)) # h_hat hh = tf.sigmoid(ln(tf.matmul(x, w_h) + tf.matmul(r*h, u_h) + (b_h if opt.bias else 0), opt)) # final output y = (1. - z) * h + z * hh return y # loop by scan out = tf.scan(step, xx, init_h) # recover dimension out = tf.transpose(out, [1, 0, 2]) # last sequence only if opt.last_only: out = out[:, tensor.get_shape().as_list()[1]-1, :] return out
def wrapper(tensor, **kwargs): import sg_initializer as init import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + _context # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0) assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.get_collection(tf.GraphKeys.VARIABLES): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + 'layers/' + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) # all layer variables start with 'layers/' prefix with tf.variable_scope('layers', reuse=opt.reuse): with tf.variable_scope(opt.name): # call layer function out = func(tensor, opt) # apply batch normalization if opt.bn: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # offset, scale parameter mean_running = init.constant('mean', opt.dim) variance_running = init.constant('variance', opt.dim, value=1) # calc batch mean, variance mean, variance = tf.nn.moments( out, axes=range(len(out.get_shape()) - 1)) # update running mean, variance def update_running_stat(): decay = 0.99 update_op = [ mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay)) ] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance) # select mean, variance by training phase m, v = tf.cond( _phase, update_running_stat, # updated running stat and batch mean, variance lambda: (mean_running, variance_running) ) # saved mean, variance # apply batch normalization out = tf.nn.batch_normalization(out, m, v, beta, gamma, tf.sg_eps) # apply normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # calc layer mean, variance for final axis mean, variance = tf.nn.moments( out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter out = gamma * out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if opt.reuse is None or not opt.reuse: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + _context, prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def sg_lstm(tensor, opt): # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step func def step(h, c, x): # forget gate f = tf.sigmoid( ln( tf.matmul(x, w_f) + tf.matmul(h, u_f) + (b_f if opt.bias else 0))) # input gate i = tf.sigmoid( ln( tf.matmul(x, w_i) + tf.matmul(h, u_i) + (b_i if opt.bias else 0))) # new cell value cc = tf.tanh( ln( tf.matmul(x, w_c) + tf.matmul(h, u_c) + (b_c if opt.bias else 0))) # out gate o = tf.sigmoid( ln( tf.matmul(x, w_o) + tf.matmul(h, u_o) + (b_o if opt.bias else 0))) # cell update cell = f * c + i * cc # final output y = o * tf.tanh(cell) return y, cell # parameter initialize w_i = init.orthogonal('W_i', (opt.in_dim, opt.dim)) u_i = init.identity('U_i', opt.dim) w_f = init.orthogonal('W_f', (opt.in_dim, opt.dim)) u_f = init.identity('U_f', opt.dim) w_o = init.orthogonal('W_o', (opt.in_dim, opt.dim)) u_o = init.identity('U_o', opt.dim) w_c = init.orthogonal('W_c', (opt.in_dim, opt.dim)) u_c = init.identity('U_c', opt.dim) if opt.bias: b_i = init.constant('b_i', opt.dim) b_f = init.constant('b_f', opt.dim) b_o = init.constant('b_o', opt.dim, value=1) b_c = init.constant('b_c', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, c, out = init_h, init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step function h, c = step(h, c, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out