def __init__(self, in_dim, dim, forget_bias=1.0, activation=tf.tanh, ln=True, bias=True, dtype=tf.float32, dev='/cpu:0', batch_size=3): self._in_dim = in_dim self._dim = dim self._forget_bias = forget_bias self._activation = activation self._ln = False self._bias = bias self._dev = dev self._size = self._in_dim * self._dim self._initializer = tf.contrib.layers.xavier_initializer( ) #tf.random_normal_initializer() self._dtype = dtype with tf.device(self._dev): with tf.variable_scope("lstm") as scp: #self.rnn_state = tf.get_variable("rnn_c",(batch_size, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False) #self.rnn_h = tf.get_variable("rnn_h",(batch_size, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False) self.rnn_state, self.rnn_h = tf.zeros( (batch_size, self._dim), dtype=tf.sg_floatx), tf.zeros( (batch_size, self._dim), dtype=tf.sg_floatx) w_i2h = tf.get_variable( 'w_i2h', (self._in_dim, 4 * self._dim), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), trainable=True) w_h2h = tf.get_variable( 'w_h2h', (self._dim, 4 * self._dim), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), trainable=True) w_b = tf.get_variable( 'w_b', (1, 4 * self._dim), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), trainable=True) if self._bias == True else 0.0 if self._ln: with tf.variable_scope("ln_rnn"): beta = tf.get_variable( 'beta', self._dim, dtype=tf.sg_floatx, initializer=tf.constant_initializer(0.0), trainable=True) gamma = tf.get_variable( 'gamma', self._dim, dtype=tf.sg_floatx, initializer=tf.constant_initializer(1.0), trainable=True)
def sg_emb(**kwargs): r"""Returns an embedding layer or a look-up table. Args: name: A name for the layer (required). emb: A 2-D array. Has the shape of `[vocabulary size -1, embedding dimension size]`. Note that the first row is filled with 0's because they correspond to padding. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. voca_size: A positive int32. Returns: A 2-D tensor. """ opt = tf.sg_opt(kwargs) assert opt.name is not None, 'name is mandatory.' import sg_initializer as init if opt.emb is None: # initialize embedding matrix assert opt.voca_size is not None, 'voca_size is mandatory.' assert opt.dim is not None, 'dim is mandatory.' w = init.he_uniform(opt.name, (opt.voca_size - 1, opt.dim)) else: # use given embedding matrix w = init.external(opt.name, value=opt.emb) # 1st row should be zero and not be updated by backprop because of zero padding. emb = tf.concat(0, [tf.zeros((1, opt.dim), dtype=tf.sg_floatx), w]) return emb
def sg_emb(**kwargs): r"""Returns a look-up table for embedding. kwargs: name: A name for the layer. emb: A 2-D array (optional). If None, the resulting tensor should have the shape of `[vocabulary size, embedding dimension size]`. Note that its first row is filled with 0's associated with padding. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. voca_size: A positive integer. The size of vocabulary. Returns: A 2-D `Tensor` of float32. """ opt = tf.sg_opt(kwargs) assert opt.name is not None, 'name is mandatory.' if opt.emb is None: # initialize embedding matrix assert opt.voca_size is not None, 'voca_size is mandatory.' assert opt.dim is not None, 'dim is mandatory.' w = tf.sg_initializer.he_uniform(opt.name, (opt.voca_size - 1, opt.dim)) else: # use given embedding matrix w = tf.sg_initializer.external(opt.name, value=opt.emb) # 1st row should be zero and not be updated by backprop because of zero padding. emb = tf.concat(0, [tf.zeros((1, opt.dim), dtype=tf.sg_floatx), w]) return emb
def sg_rnn(tensor, opt): r"""Applies a simple rnn. Args: tensor: A 3-D `Tensor`. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. bias: Boolean. If True, biases are added. ln: Boolean. If True, layer normalization is applied. init_state: A 2-D `Tensor`. If None, the initial state is set to zeros. last_only: Boolean. If True, the outputs in the last time step are returned. Returns: A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim]. If last_only is True, the shape will be [batch size, dim]. """ # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step function def step(h, x): # simple rnn ### Replace tensor[:, i, :] with x. bryan ### y = ln( tf.matmul(tensor[:, i, :], w) + tf.matmul(h, u) + (b if opt.bias else 0)) return y # parameter initialize w = init.orthogonal('W', (opt.in_dim, opt.dim)) u = init.identity('U', opt.dim) if opt.bias: b = init.constant('b', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, out = init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step func h = step(h, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out
def sg_rnn(tensor, opt): # parameter initialize w = init.orthogonal('W', (opt.in_dim, opt.dim)) u = init.identity('U', opt.dim) if opt.bias: b = init.constant('b', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # permute dimension for scan loop xx = tf.transpose(tensor, [1, 0, 2]) # step func def step(h, x): # layer normalization def ln(xx, opt): if opt.ln: # calc layer mean, variance for final axis mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1]) # apply layer normalization ( explicit broadcasting needed ) broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1) xx = (xx - tf.reshape(mean, broadcast_shape)) \ / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape) # apply parameter return gamma * xx + beta # apply transform y = ln(tf.matmul(x, w) + tf.matmul(h, u) + (b if opt.bias else 0), opt) return y # loop by scan out = tf.scan(step, xx, init_h) # recover dimension out = tf.transpose(out, [1, 0, 2]) # last sequence only if opt.last_only: out = out[:, tensor.get_shape().as_list()[1]-1, :] return out
def q_process(t1, t2): ''' Processes each training sample so that it fits in the queue. ''' # Lstrip zeros zeros = tf.equal(t1, tf.zeros_like(t1)).sg_int().sg_sum() t1 = t1[zeros:] t2 = t2[zeros:] # zero-PrePadding t1 = tf.concat([tf.zeros([Hyperparams.seqlen-1], tf.int32), t1], 0)# 49 zero-prepadding t2 = tf.concat([tf.zeros([Hyperparams.seqlen-1], tf.int32), t2], 0)# 49 zero-prepadding # radom crop stacked = tf.stack((t1, t2)) cropped = tf.random_crop(stacked, [2, Hyperparams.seqlen]) t1, t2 = cropped[0], cropped[1] t2 = t2[-1] return t1, t2
def sg_rnn(tensor, opt): # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step function def step(h, x): # simple rnn y = ln( tf.matmul(tensor[:, i, :], w) + tf.matmul(h, u) + (b if opt.bias else 0)) return y # parameter initialize w = init.orthogonal('W', (opt.in_dim, opt.dim)) u = init.identity('U', opt.dim) if opt.bias: b = init.constant('b', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, out = init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step func h = step(h, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out
def sg_emb(**kwargs): opt = tf.sg_opt(kwargs) assert opt.name is not None, 'name is mandatory.' import sg_initializer as init if opt.emb is None: # initialize embedding matrix assert opt.voca_size is not None, 'voca_size is mandatory.' assert opt.dim is not None, 'dim is mandatory.' w = init.he_uniform(opt.name, (opt.voca_size - 1, opt.dim)) else: # use given embedding matrix w = init.external(opt.name, value=opt.emb) # 1st row should be zero and not be updated by backprop because of zero padding. emb = tf.concat(0, [tf.zeros((1, opt.dim), dtype=tf.sg_floatx), w]) return emb
def trainIt(): data = prepareData() x = data['train'][0] # x = data['train'] z = tf.random_normal((batch_size, rand_dim)) gen = generator(z) disc_real = discriminator(x) disc_fake = discriminator(gen) loss_d_r = disc_real.sg_mse(target=data['train'][1], name='disc_real') # loss_d_r = disc_real.sg_mse(target = tf.ones(batch_size), name = 'disc_real') loss_d_f = disc_fake.sg_mse(target=tf.zeros(batch_size), name='disc_fake') loss_d = (loss_d_r + loss_d_f) / 2 loss_g = disc_fake.sg_mse(target=tf.ones(batch_size), name='gen') # train_disc = tf.sg_optim(loss_d, lr=0.01, name = 'train_disc', category = 'discriminator') # discriminator train ops train_disc = tf.sg_optim(loss_d_r, lr=0.01, name='train_disc', category='discriminator') train_gen = tf.sg_optim(loss_g, lr=0.01, category='generator') # generator train ops @tf.sg_train_func def alt_train(sess, opt): if sess.run(tf.sg_global_step()) % 1 == 0: l_disc = sess.run([loss_d_r, train_disc])[0] # training discriminator else: l_disc = sess.run(loss_d) # l_gen = sess.run([loss_g, train_gen])[0] # training generator # print np.mean(l_gen) return np.mean(l_disc) #+ np.mean(l_gen) alt_train(log_interval=10, max_ep=25, ep_size=(1100 + 690) / batch_size, early_stop=False, save_dir='asset/train/gan', save_interval=10)
def zero_state(self, batch_size): dtype = tf.float32 return (tf.zeros((batch_size, self._seqlen, self._dim), dtype=tf.sg_floatx), tf.zeros((batch_size, self._seqlen, self._dim), dtype=tf.sg_floatx))
def rnn_body(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses): x = x_sent.read(time) y = x_sent.read(time + 1) # (batch, sentlen) = (16, 200) # shift target by one step for training source y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), y[:, :-1]], 1) reuse_vars = time == tf.constant(0) or reu_vars # -------------------------- BYTENET ENCODER -------------------------- # embed table lookup enc = x.sg_lookup(emb=emb_x) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(num_blocks): enc = (enc.sg_res_block( size=5, rate=1, name="enc1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars)) # -------------------------- QCNN + QPOOL ENCODER with attention #1 -------------------------- #quasi cnn layer ZFO [batch * 3, t, dim2 ] conv = enc.sg_quasi_conv1d(is_enc=True, size=3, name="qconv_1", reuse_vars=reuse_vars) #attention layer # recurrent layer # 1 + final encoder hidden state subrec1 = tf.tile((subrec1.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) concat = conv.sg_concat(target=subrec1, axis=0) # (batch*4, sentlen, latentdim) pool = concat.sg_quasi_rnn(is_enc=True, att=True, name="qrnn_1", reuse_vars=reuse_vars) subrec1 = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- QCNN + QPOOL ENCODER with attention #2 -------------------------- # quazi cnn ZFO (batch*3, sentlen, latentdim) conv = pool.sg_quasi_conv1d(is_enc=True, size=2, name="qconv_2", reuse_vars=reuse_vars) # (batch, sentlen-duplicated, latentdim) subrec2 = tf.tile((subrec2.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) # (batch*4, sentlen, latentdim) concat = conv.sg_concat(target=subrec2, axis=0) pool = concat.sg_quasi_rnn(is_enc=True, att=True, name="qrnn_2", reuse_vars=reuse_vars) subrec2 = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- ConvLSTM with RESIDUAL connection and MULTIPLICATIVE block -------------------------- #residual block causal = False # for encoder crnn_input = (pool[:Hp.batch_size, :, :].sg_bypass_gpus( name='relu_0', act='relu', bn=(not causal), ln=causal).sg_conv1d_gpus(name="dimred_0", size=1, dev="/cpu:0", reuse=reuse_vars, dim=Hp.hd / 2, act='relu', bn=(not causal), ln=causal)) # conv LSTM with tf.variable_scope("mem/clstm") as scp: (crnn_state, crnn_h) = crnn_cell(crnn_input, (crnn_state, crnn_h), size=5, reuse_vars=reuse_vars) # dimension recover and residual connection rnn_input0 = pool[:Hp.batch_size,:,:] + crnn_h\ .sg_conv1d_gpus(name = "diminc_0",size=1,dev="/cpu:0", dim=Hp.hd,reuse=reuse_vars, act='relu', bn=(not causal), ln=causal) # -------------------------- QCNN + QPOOL ENCODER with attention #3 -------------------------- # pooling for lstm input # quazi cnn ZFO (batch*3, sentlen, latentdim) conv = rnn_input0.sg_quasi_conv1d(is_enc=True, size=2, name="qconv_3", reuse_vars=reuse_vars) pool = conv.sg_quasi_rnn(is_enc=True, att=False, name="qrnn_3", reuse_vars=reuse_vars) rnn_input = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- LSTM with RESIDUAL connection and MULTIPLICATIVE block -------------------------- # recurrent block with tf.variable_scope("mem/lstm") as scp: (rnn_state, rnn_h) = rnn_cell(rnn_input, (rnn_state, rnn_h)) rnn_h2 = tf.tile(((rnn_h + rnn_input).sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) # -------------------------- BYTENET DECODER -------------------------- # CNN decoder dec = y_src.sg_lookup(emb=emb_y).sg_concat(target=rnn_h2, name="dec") for i in range(num_blocks): dec = (dec.sg_res_block( size=3, rate=1, causal=True, name="dec1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=2, causal=True, name="dec2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=4, causal=True, name="dec4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=8, causal=True, name="dec8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=16, causal=True, name="dec16_%d" % (i), reuse_vars=reuse_vars)) # final fully convolution layer for softmax dec = dec.sg_conv1d_gpus(size=1, dim=Hp.vs, name="out", summary=False, dev=self._dev, reuse=reuse_vars) ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example") cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy') losses = tf.add_n([losses, cross_entropy_mean], name='total_loss') return (time + 1, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses)
def __init__(self, mode="train"): # Inputs and Labels if mode == "train": self.x, self.y, self.num_batch = get_batch_data( ) # (16, 150) int32, (16, 150) int32, int self.y_src = tf.concat( axis=1, values=[tf.zeros((Hp.bs, 1), tf.int32), self.y[:, :-1]]) # (16, 150) int32 else: # inference self.x = tf.placeholder(tf.int32, shape=(Hp.bs, Hp.maxlen)) self.y_src = tf.placeholder(tf.int32, shape=(Hp.bs, Hp.maxlen)) # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Embedding self.emb_x = tf.sg_emb(name='emb_x', voca_size=len(self.char2idx), dim=Hp.hd) # (179, 320) self.emb_y = tf.sg_emb(name='emb_y', voca_size=len(self.char2idx), dim=Hp.hd) # (179, 320) self.X = self.x.sg_lookup(emb=self.emb_x) # (16, 150, 320) self.Y = self.y_src.sg_lookup(emb=self.emb_y) # (16, 150, 320) # Encoding self.conv = self.X.sg_quasi_conv1d(is_enc=True, size=6) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H_zfo1 = self.pool[Hp.bs:] # (16*3, 15, 320) for decoding self.conv = self.pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H_zfo2 = self.pool[Hp.bs:] # (16*3, 150, 320) for decoding self.conv = self.pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H_zfo3 = self.pool[Hp.bs:] # (16*3, 150, 320) for decoding self.conv = self.pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H4 = self.pool[:Hp.bs] self.H_zfo4 = self.pool[Hp.bs:] # (16*3, 150, 320) for decoding # Decoding self.dec = self.Y.sg_concat(target=self.H_zfo1, dim=0) self.d_conv = self.dec.sg_quasi_conv1d(is_enc=False, size=2) self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) self.d_conv = (self.d_pool.sg_concat( target=self.H_zfo2, dim=0).sg_quasi_conv1d(is_enc=False, size=2)) self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) self.d_conv = (self.d_pool.sg_concat( target=self.H_zfo3, dim=0).sg_quasi_conv1d(is_enc=False, size=2)) self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) self.d_conv = (self.d_pool.sg_concat( target=self.H_zfo4, dim=0).sg_quasi_conv1d(is_enc=False, size=2)) self.concat = self.H4.sg_concat(target=self.d_conv, dim=0) self.d_pool = self.concat.sg_quasi_rnn(is_enc=False, att=True) # (16*4, 150, 320) self.logits = self.d_pool.sg_conv1d(size=1, dim=len(self.char2idx), act="linear") # (16, 150, 179) self.preds = self.logits.sg_argmax() if mode == 'train': # cross entropy loss with logits ( for training set ) self.loss = self.logits.sg_ce(target=self.y, mask=True) self.istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (self.loss.sg_sum()) / ( self.istarget.sg_sum() + 0.00001) tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
def tower_loss2_old(xx, scope, reuse_vars=False): # make embedding matrix for source and target with tf.variable_scope('embs', reuse=reuse_vars): emb_x = tf.sg_emb(name='emb_x', voca_size=Hp.vs, dim=Hp.hd, dev=self._dev) emb_y = tf.sg_emb(name='emb_y', voca_size=Hp.vs, dim=Hp.hd, dev=self._dev) x_sents = tf.unstack(xx, axis=1) #each element is (batch, sentlen) # generate first an unconditioned sentence n_input = Hp.hd subrec1 = subrec_zero_state(Hp.bs, Hp.hd) subrec2 = subrec_zero_state(Hp.bs, Hp.hd) rnn_cell = LSTMCell(in_dim=n_input, dim=Hp.hd) (rnn_state, rnn_h) = rnn_cell.zero_state(Hp.bs) crnn_cell = ConvLSTMCell(in_dim=n_input, dim=Hp.hd) (crnn_state, crnn_h) = crnn_cell.zero_state(n_input) for sent in range(len(x_sents) - 1): y = x_sents[i + 1] x = x_sents[i] # (batch, sentlen) = (16, 200) # shift target by one step for training source y_src = tf.concat([tf.zeros((Hp.bs, 1), tf.sg_intx), y[:, :-1]], 1) # embed table lookup enc = x.sg_lookup(emb=emb_x) #(batch, sentlen, dim1) # loop dilated conv block for i in range(num_blocks): enc = (enc.sg_res_block( size=5, rate=1, name="enc1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars)) #quasi rnn layer [batch * 3, t, dim2 ] conv = enc.sg_quasi_conv1d(is_enc=True, size=2, name="conv1", reuse_vars=reuse_vars) #attention layer # recurrent layer # 1 + final encoder hidden state concat = subrec1.sg_concat(target=conv, dim=0) subrec1 = conv.sg_quasi_rnn(is_enc=True, att=True) conv = pool.sg_quasi_conv1d(is_enc=True, size=2, name="conv2", reuse_vars=reuse_vars) concat = subrec2.sg_concat(target=conv, dim=0) subrec2 = conv.sg_quasi_rnn(is_enc=True, att=True) # conv LSTM (crnn_state, crnn_h) = crnn_cell(subrec2, (crnn_state, crnn_h), 5) # recurrent block (rnn_state, rnn_h) = rnn_cell(crnn_h, (rnn_state, rnn_h)) # CNN decoder dec = crnn_h.sg_concat(target=y_src.sg_lookup(emb=emb_y), name="dec") for i in range(num_blocks): dec = (dec.sg_res_block( size=3, rate=1, causal=True, name="dec1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=2, causal=True, name="dec2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=4, causal=True, name="dec4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=8, causal=True, name="dec8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=16, causal=True, name="dec16_%d" % (i), reuse_vars=reuse_vars)) # final fully convolution layer for softmax dec = dec.sg_conv1d_gpus(size=1, dim=Hp.vs,name="out",summary=False,\ dev = self._dev,reuse=reuse_vars) ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example") cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') return total_loss
def sg_gru(tensor, opt): # parameter initialize w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim)) u_z = init.identity('U_z', opt.dim) w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim)) u_r = init.identity('U_r', opt.dim) w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim)) u_h = init.identity('U_h', opt.dim) if opt.bias: b_z = init.constant('b_z', opt.dim) b_r = init.constant('b_r', opt.dim) b_h = init.constant('b_h', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # permute dimension for scan loop xx = tf.transpose(tensor, [1, 0, 2]) # step func def step(h, x): # layer normalization def ln(xx, opt): if opt.ln: # calc layer mean, variance for final axis mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1]) # apply layer normalization ( explicit broadcasting needed ) broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1) xx = (xx - tf.reshape(mean, broadcast_shape)) \ / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape) # apply parameter return gamma * xx + beta # update gate z = tf.sigmoid(ln(tf.matmul(x, w_z) + tf.matmul(h, u_z) + (b_z if opt.bias else 0), opt)) # reset gate r = tf.sigmoid(ln(tf.matmul(x, w_r) + tf.matmul(h, u_r) + (b_r if opt.bias else 0), opt)) # h_hat hh = tf.sigmoid(ln(tf.matmul(x, w_h) + tf.matmul(r*h, u_h) + (b_h if opt.bias else 0), opt)) # final output y = (1. - z) * h + z * hh return y # loop by scan out = tf.scan(step, xx, init_h) # recover dimension out = tf.transpose(out, [1, 0, 2]) # last sequence only if opt.last_only: out = out[:, tensor.get_shape().as_list()[1]-1, :] return out
# inputs # # ComTrans parallel corpus input tensor ( with QueueRunner ) data = ComTrans(batch_size=batch_size) # source, target sentence x, y = data.source, data.target voca_size = data.voca_size # make embedding matrix for source and target emb_x = tf.sg_emb(name='emb_x', voca_size=voca_size, dim=latent_dim) emb_y = tf.sg_emb(name='emb_y', voca_size=voca_size, dim=latent_dim) # shift target for training source y_src = tf.concat(1, [tf.zeros((batch_size, 1), tf.sg_intx), y[:, :-1]]) # residual block @tf.sg_sugar_func def sg_res_block(tensor, opt): # default rate opt += tf.sg_opt(size=3, rate=1, causal=False) # input dimension in_dim = tensor.get_shape().as_list()[-1] # reduce dimension input_ = (tensor.sg_bypass(act='relu', bn=(not opt.causal), ln=opt.causal).sg_conv1d(size=1, dim=in_dim / 2,
def sg_gru(tensor, opt): r"""Applies a GRU. Args: tensor: A 3-D `Tensor` (automatically passed by decorator). opt: in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. bias: Boolean. If True, biases are added. ln: Boolean. If True, layer normalization is applied. init_state: A 2-D `Tensor`. If None, the initial state is set to zeros. last_only: Boolean. If True, the outputs in the last time step are returned. mask: Boolean 2-D `Tensor` or None(default). For false elements values are excluded from the calculation. As a result, the outputs for the locations become 0. summary: If True, summaries are added. The default is True. Returns: A `Tensor`. If last_only is True, the output tensor has shape [batch size, dim]. Otherwise, [batch size, time steps, dim]. """ # layer normalization # noinspection PyPep8 ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step func def step(hh, x): # update gate z = tf.sigmoid(ln(tf.matmul(x, w_z) + tf.matmul(hh, u_z) + (b_z if opt.bias else 0))) # reset gate r = tf.sigmoid(ln(tf.matmul(x, w_r) + tf.matmul(hh, u_r) + (b_r if opt.bias else 0))) # h_hat h_hat = tf.tanh(ln(tf.matmul(x, w_h) + tf.matmul(r * hh, u_h) + (b_h if opt.bias else 0))) # final output y = (1. - z) * h_hat + z * hh return y # parameter initialize w_z = tf.sg_initializer.orthogonal('W_z', (opt.in_dim, opt.dim), summary=opt.summary) u_z = tf.sg_initializer.identity('U_z', opt.dim, summary=opt.summary) w_r = tf.sg_initializer.orthogonal('W_r', (opt.in_dim, opt.dim), summary=opt.summary) u_r = tf.sg_initializer.identity('U_r', opt.dim, summary=opt.summary) w_h = tf.sg_initializer.orthogonal('W_h', (opt.in_dim, opt.dim), summary=opt.summary) u_h = tf.sg_initializer.identity('U_h', opt.dim, summary=opt.summary) if opt.bias: b_z = tf.sg_initializer.constant('b_z', opt.dim, summary=opt.summary) b_r = tf.sg_initializer.constant('b_r', opt.dim, summary=opt.summary) b_h = tf.sg_initializer.constant('b_h', opt.dim, summary=opt.summary) # layer normalization parameters if opt.ln: # offset, scale parameter beta = tf.sg_initializer.constant('beta', opt.dim, summary=opt.summary) gamma = tf.sg_initializer.constant('gamma', opt.dim, value=1, summary=opt.summary) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, out = init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step function h = step(h, tensor[:, i, :]) # save result # noinspection PyUnresolvedReferences out.append(h.sg_expand_dims(axis=1)) # merge tensor out = tf.concat(out, 1) # apply mask if opt.mask is None: if opt.last_only: return out[:, -1, :] else: return out else: # apply mask out *= opt.mask.sg_expand_dims(axis=2).sg_float() if opt.last_only: # calc sequence length using given mask seq_len = opt.mask.sg_int().sg_sum(axis=1) # get last output rev = tf.reverse_sequence(out, seq_len, seq_axis=1) return rev[:, 0, :] else: return out
def sg_lstm(tensor, opt): # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step func def step(h, c, x): # forget gate f = tf.sigmoid( ln( tf.matmul(x, w_f) + tf.matmul(h, u_f) + (b_f if opt.bias else 0))) # input gate i = tf.sigmoid( ln( tf.matmul(x, w_i) + tf.matmul(h, u_i) + (b_i if opt.bias else 0))) # new cell value cc = tf.tanh( ln( tf.matmul(x, w_c) + tf.matmul(h, u_c) + (b_c if opt.bias else 0))) # out gate o = tf.sigmoid( ln( tf.matmul(x, w_o) + tf.matmul(h, u_o) + (b_o if opt.bias else 0))) # cell update cell = f * c + i * cc # final output y = o * tf.tanh(cell) return y, cell # parameter initialize w_i = init.orthogonal('W_i', (opt.in_dim, opt.dim)) u_i = init.identity('U_i', opt.dim) w_f = init.orthogonal('W_f', (opt.in_dim, opt.dim)) u_f = init.identity('U_f', opt.dim) w_o = init.orthogonal('W_o', (opt.in_dim, opt.dim)) u_o = init.identity('U_o', opt.dim) w_c = init.orthogonal('W_c', (opt.in_dim, opt.dim)) u_c = init.identity('U_c', opt.dim) if opt.bias: b_i = init.constant('b_i', opt.dim) b_f = init.constant('b_f', opt.dim) b_o = init.constant('b_o', opt.dim, value=1) b_c = init.constant('b_c', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, c, out = init_h, init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step function h, c = step(h, c, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out
def zero_state(self, batch_size): dtype = tf.float32 state_size = self.state_size return (tf.zeros((batch_size, state_size), dtype=tf.sg_floatx), tf.zeros((batch_size, state_size), dtype=tf.sg_floatx))
# hyper parameters # batch_size = 16 # batch size # # inputs # # ComTrans parallel corpus input tensor ( with QueueRunner ) data = ComTrans(batch_size=batch_size) # source, target sentence x, y = data.source, data.target # shift target for training source y_in = tf.concat([tf.zeros((batch_size, 1), tf.sg_intx), y[:, :-1]], axis=1) # vocabulary size voca_size = data.voca_size # make embedding matrix for source and target emb_x = tf.sg_emb(name='emb_x', voca_size=voca_size, dim=latent_dim) emb_y = tf.sg_emb(name='emb_y', voca_size=voca_size, dim=latent_dim) # latent from embed table z_x = x.sg_lookup(emb=emb_x) z_y = y_in.sg_lookup(emb=emb_y) # encode graph ( atrous convolution ) enc = encode(z_x) # concat merge target source
def __init__(self, mode="train"): # Inputs and Labels if mode == "train": self.x, self.y, self.num_batch = get_batch_data() # (16, 150) int32, (16, 150) int32, int self.y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), self.y[:, :-1]], 1) # (16, 150) int32 else: # inference self.x = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) self.y_src = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) # Load vocabulary char2idx, idx2char = load_vocab() # Embedding def embed(inputs, vocab_size, embed_size, variable_scope): ''' inputs = tf.expand_dims(tf.range(5), 0) => (1, 5) _embed(inputs, 5, 10) => (1, 5, 10) ''' with tf.variable_scope(variable_scope): lookup_table = tf.get_variable('lookup_table', dtype=tf.float32, shape=[vocab_size, embed_size], initializer=tf.truncated_normal_initializer()) return tf.nn.embedding_lookup(lookup_table, inputs) X = embed(self.x, vocab_size=len(char2idx), embed_size=Hp.hidden_units, variable_scope='X') # (179, 320) Y = embed(self.y_src, vocab_size=len(char2idx), embed_size=Hp.hidden_units, variable_scope='Y') # (179, 320) # Y = tf.concat((tf.zeros_like(Y[:, :1, :]), Y[:, :-1, :]), 1) # Encoding conv = X.sg_quasi_conv1d(is_enc=True, size=6) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo1 = pool[Hp.batch_size:] # (16*3, 15, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo2 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo3 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H4 = pool[:Hp.batch_size] # (16, 150, 320) for decoding H_zfo4 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding # Decoding d_conv = (Y.sg_concat(target=H_zfo1, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo2, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo3, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo4, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) concat = H4.sg_concat(target=d_conv, axis=0) d_pool = concat.sg_quasi_rnn(is_enc=False, att=True) # (16, 150, 320) logits = d_pool.sg_conv1d(size=1, dim=len(char2idx), act="linear") # (16, 150, 179) if mode=='train': # cross entropy loss with logits ( for training set ) self.loss = logits.sg_ce(target=self.y, mask=True) istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (self.loss.sg_sum()) / (istarget.sg_sum() + 1e-8) tf.sg_summary_loss(self.reduced_loss, "reduced_loss") else: # inference self.preds = logits.sg_argmax()
def sg_lstm(tensor, opt): r"""Applies an LSTM. Args: tensor: A 3-D `Tensor` (automatically passed by decorator). opt: in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. bias: Boolean. If True, biases are added. ln: Boolean. If True, layer normalization is applied. init_state: A 2-D `Tensor`. If None, the initial state is set to zeros. last_only: Boolean. If True, the outputs in the last time step are returned. mask: Boolean 2-D `Tensor` or None(default). For false elements values are excluded from the calculation. As a result, the outputs for the locations become 0. summary: If True, summaries are added. The default is True. Returns: A `Tensor`. If last_only is True, the output tensor has shape [batch size, dim]. Otherwise, [batch size, time steps, dim]. """ # layer normalization # noinspection PyPep8 ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step func def step(hh, cc, x): # forget gate f = tf.sigmoid(ln(tf.matmul(x, w_f) + tf.matmul(hh, u_f) + (b_f if opt.bias else 0))) # input gate ii = tf.sigmoid(ln(tf.matmul(x, w_i) + tf.matmul(hh, u_i) + (b_i if opt.bias else 0))) # new cell value c_new = tf.tanh(ln(tf.matmul(x, w_c) + tf.matmul(hh, u_c) + (b_c if opt.bias else 0))) # out gate o = tf.sigmoid(ln(tf.matmul(x, w_o) + tf.matmul(hh, u_o) + (b_o if opt.bias else 0))) # cell update cell = f * cc + ii * c_new # final output y = o * tf.tanh(cell) return y, cell # parameter initialize w_i = tf.sg_initializer.orthogonal('W_i', (opt.in_dim, opt.dim), summary=opt.summary) u_i = tf.sg_initializer.identity('U_i', opt.dim, summary=opt.summary) w_f = tf.sg_initializer.orthogonal('W_f', (opt.in_dim, opt.dim), summary=opt.summary) u_f = tf.sg_initializer.identity('U_f', opt.dim, summary=opt.summary) w_o = tf.sg_initializer.orthogonal('W_o', (opt.in_dim, opt.dim), summary=opt.summary) u_o = tf.sg_initializer.identity('U_o', opt.dim, summary=opt.summary) w_c = tf.sg_initializer.orthogonal('W_c', (opt.in_dim, opt.dim), summary=opt.summary) u_c = tf.sg_initializer.identity('U_c', opt.dim, summary=opt.summary) if opt.bias: b_i = tf.sg_initializer.constant('b_i', opt.dim, summary=opt.summary) b_f = tf.sg_initializer.constant('b_f', opt.dim, summary=opt.summary) b_o = tf.sg_initializer.constant('b_o', opt.dim, value=1, summary=opt.summary) b_c = tf.sg_initializer.constant('b_c', opt.dim, summary=opt.summary) # layer normalization parameters if opt.ln: # offset, scale parameter beta = tf.sg_initializer.constant('beta', opt.dim, summary=opt.summary) gamma = tf.sg_initializer.constant('gamma', opt.dim, value=1, summary=opt.summary) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, c, out = init_h, init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step function h, c = step(h, c, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(axis=1)) # merge tensor out = tf.concat(out, 1) # apply mask if opt.mask is None: if opt.last_only: return out[:, -1, :] else: return out else: # apply mask out *= opt.mask.sg_expand_dims(axis=2).sg_float() if opt.last_only: # calc sequence length using given mask seq_len = opt.mask.sg_int().sg_sum(axis=1) # get last output rev = tf.reverse_sequence(out, seq_len, seq_axis=1) return rev[:, 0, :] else: return out
def sg_gru(tensor, opt): r"""Applies a GRU. Args: tensor: A 3-D `Tensor`. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. bias: Boolean. If True, biases are added. ln: Boolean. If True, layer normalization is applied. init_state: A 2-D `Tensor`. If None, the initial state is set to zeros. last_only: Boolean. If True, the outputs in the last time step are returned. Returns: A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim]. If last_only is True, the shape will be [batch size, dim]. """ # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step func def step(h, x): # update gate z = tf.sigmoid( ln( tf.matmul(x, w_z) + tf.matmul(h, u_z) + (b_z if opt.bias else 0))) # reset gate r = tf.sigmoid( ln( tf.matmul(x, w_r) + tf.matmul(h, u_r) + (b_r if opt.bias else 0))) # h_hat hh = tf.tanh( ln( tf.matmul(x, w_h) + tf.matmul(r * h, u_h) + (b_h if opt.bias else 0))) # final output y = (1. - z) * h + z * hh return y # parameter initialize w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim)) u_z = init.identity('U_z', opt.dim) w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim)) u_r = init.identity('U_r', opt.dim) w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim)) u_h = init.identity('U_h', opt.dim) if opt.bias: b_z = init.constant('b_z', opt.dim) b_r = init.constant('b_r', opt.dim) b_h = init.constant('b_h', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, out = init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step function h = step(h, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out
def sg_lstm(tensor, opt): r"""Applies an LSTM. Args: tensor: A 3-D `Tensor`. in_dim: A positive `integer`. The size of input dimension. dim: A positive `integer`. The size of output dimension. bias: Boolean. If True, biases are added. ln: Boolean. If True, layer normalization is applied. init_state: A 2-D `Tensor`. If None, the initial state is set to zeros. last_only: Boolean. If True, the outputs in the last time step are returned. Returns: A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim]. If last_only is True, the shape will be [batch size, dim]. """ # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step func def step(h, c, x): # forget gate f = tf.sigmoid( ln( tf.matmul(x, w_f) + tf.matmul(h, u_f) + (b_f if opt.bias else 0))) # input gate i = tf.sigmoid( ln( tf.matmul(x, w_i) + tf.matmul(h, u_i) + (b_i if opt.bias else 0))) # new cell value cc = tf.tanh( ln( tf.matmul(x, w_c) + tf.matmul(h, u_c) + (b_c if opt.bias else 0))) # out gate o = tf.sigmoid( ln( tf.matmul(x, w_o) + tf.matmul(h, u_o) + (b_o if opt.bias else 0))) # cell update cell = f * c + i * cc # final output y = o * tf.tanh(cell) return y, cell # parameter initialize w_i = init.orthogonal('W_i', (opt.in_dim, opt.dim)) u_i = init.identity('U_i', opt.dim) w_f = init.orthogonal('W_f', (opt.in_dim, opt.dim)) u_f = init.identity('U_f', opt.dim) w_o = init.orthogonal('W_o', (opt.in_dim, opt.dim)) u_o = init.identity('U_o', opt.dim) w_c = init.orthogonal('W_c', (opt.in_dim, opt.dim)) u_c = init.identity('U_c', opt.dim) if opt.bias: b_i = init.constant('b_i', opt.dim) b_f = init.constant('b_f', opt.dim) b_o = init.constant('b_o', opt.dim, value=1) b_c = init.constant('b_c', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, c, out = init_h, init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step function h, c = step(h, c, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out
def __init__(self, mode="train"): # Inputs and Labels if mode == "train": self.x, self.y, self.num_batch = get_batch_data( ) # (16, 150) int32, (16, 150) int32, int self.y_src = tf.concat( [tf.zeros((Hp.batch_size, 1), tf.int32), self.y[:, :-1]], 1) # (16, 150) int32 else: # inference self.x = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) self.y_src = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) # Load vocabulary char2idx, idx2char = load_vocab() # Embedding emb_x = tf.sg_emb(name='emb_x', voca_size=len(char2idx), dim=Hp.hidden_units) # (179, 320) emb_y = tf.sg_emb(name='emb_y', voca_size=len(char2idx), dim=Hp.hidden_units) # (179, 320) X = self.x.sg_lookup(emb=emb_x) # (16, 150, 320) Y = self.y_src.sg_lookup(emb=emb_y) # (16, 150, 320) # Encoding conv = X.sg_quasi_conv1d(is_enc=True, size=6) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo1 = pool[Hp.batch_size:] # (16*3, 15, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo2 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo3 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H4 = pool[:Hp.batch_size] # (16, 150, 320) for decoding H_zfo4 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding # Decoding d_conv = (Y.sg_concat(target=H_zfo1, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo2, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo3, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo4, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) concat = H4.sg_concat(target=d_conv, axis=0) d_pool = concat.sg_quasi_rnn(is_enc=False, att=True) # (16, 150, 320) logits = d_pool.sg_conv1d(size=1, dim=len(char2idx), act="linear") # (16, 150, 179) if mode == 'train': # cross entropy loss with logits ( for training set ) loss = logits.sg_ce(target=self.y, mask=True) istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (loss.sg_sum()) / (istarget.sg_sum() + 0.00001) tf.sg_summary_loss(self.reduced_loss, "reduced_loss") else: # inference self.preds = logits.sg_argmax()
# # inputs # # MNIST input tensor ( with QueueRunner ) data = tf.sg_data.Mnist(batch_size=batch_size) # input images and label x = data.train.image y = data.train.label # labels for discriminator y_real = tf.ones(batch_size) y_fake = tf.zeros(batch_size) # discriminator labels ( half 1s, half 0s ) y_disc = tf.concat(0, [y, y * 0]) # categorical latent variable z_cat = tf.multinomial( tf.ones((batch_size, cat_dim), dtype=tf.sg_floatx) / cat_dim, 1).sg_squeeze().sg_int() # continuous latent variable z_con = tf.random_normal((batch_size, con_dim)) # random latent variable dimension z_rand = tf.random_normal((batch_size, rand_dim)) # latent variable z = tf.concat(1, [z_cat.sg_one_hot(depth=cat_dim), z_con, z_rand])
def sg_gru(tensor, opt): # layer normalization ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v # step func def step(h, x): # update gate z = tf.sigmoid( ln( tf.matmul(x, w_z) + tf.matmul(h, u_z) + (b_z if opt.bias else 0))) # reset gate r = tf.sigmoid( ln( tf.matmul(x, w_r) + tf.matmul(h, u_r) + (b_r if opt.bias else 0))) # h_hat hh = tf.tanh( ln( tf.matmul(x, w_h) + tf.matmul(r * h, u_h) + (b_h if opt.bias else 0))) # final output y = (1. - z) * h + z * hh return y # parameter initialize w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim)) u_z = init.identity('U_z', opt.dim) w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim)) u_r = init.identity('U_r', opt.dim) w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim)) u_h = init.identity('U_h', opt.dim) if opt.bias: b_z = init.constant('b_z', opt.dim) b_r = init.constant('b_r', opt.dim) b_h = init.constant('b_h', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state is not None \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # do rnn loop h, out = init_h, [] for i in range(tensor.get_shape().as_list()[1]): # apply step function h = step(h, tensor[:, i, :]) # save result out.append(h.sg_expand_dims(dim=1)) # merge tensor if opt.last_only: out = out[-1].sg_squeeze(dim=1) else: out = tf.concat(1, out) return out