def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx, summary=True, regularizer=None, trainable=True): r"""Creates a tensor variable of which initial values are of an orthogonal ndarray. See [Saxe et al. 2014.](http://arxiv.org/pdf/1312.6120.pdf) Args: name: The name of new variable. shape: A tuple/list of integers. scale: A Python scalar. dtype: Either float32 or float64. summary: If True, add this constant to tensor board summary. regularizer: A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization trainable: If True, add this constant to trainable collection. Default is True. Returns: A `Variable`. """ flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) # create variable x = tf.get_variable(name, initializer=tf.constant(scale * q[:shape[0], :shape[1]], dtype=dtype), regularizer=regularizer, trainable=trainable) # add summary if summary: tf.sg_summary_param(x) return x
def external(name, value, dtype=tf.sg_floatx, summary=True, regularizer=None, trainable=True): r"""Creates a tensor variable of which initial values are `value`. For example, ``` external("external", [3,3,1,2]) => [3. 3. 1. 2.] ``` Args: name: The name of new variable. value: A constant value (or list) of output type `dtype`. dtype: The type of the elements of the resulting tensor. summary: If True, add this constant to tensor board summary. regularizer: A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization trainable: If True, add this constant to trainable collection. Default is True. Returns: A `Variable`. Has the same contents as `value` of `dtype`. """ # create variable x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype), regularizer=regularizer, trainable=trainable) # add summary if summary: tf.sg_summary_param(x) return x
def external(name, value, dtype=tf.sg_floatx, summary=True): r"""Creates a tensor variable of which initial values are `value`. For example, ``` external("external", [3,3,1,2]) => [3. 3. 1. 2.] ``` Args: name: The name of new variable. value: A constant value (or list) of output type `dtype`. dtype: The type of the elements of the resulting tensor. summary: If True, add this constant to tensor board summary. Returns: A `Variable`. Has the same contents as `value` of `dtype`. """ # create variable x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse and summary: tf.sg_summary_param(x) return x
def identity(name, dim, scale=1, dtype=tf.sg_floatx): x = tf.get_variable(name, initializer=tf.constant(np.eye(dim) * scale, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def identity(name, dim, scale=1, dtype=tf.sg_floatx, summary=True): r"""Creates a tensor variable of which initial values are of an identity matrix. Note that the default value of `scale` (=0.05) is different from the min/max values (=0.0, 1.0) of tf.random_uniform_initializer. For example, ``` identity("identity", 3, 2) => [[2. 0. 0.] [0. 2. 0.] [0. 0. 2.]] ``` Args: name: The name of new variable. dim: An int. The size of the first and second dimension of the output tensor. scale: A Python scalar. The value on the diagonal. dtype: The type of the elements of the resulting tensor. summary: If True, add this constant to tensor board summary. Returns: A 2-D `Variable`. """ x = tf.get_variable(name, initializer=tf.constant(np.eye(dim) * scale, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse and summary: tf.sg_summary_param(x) return x
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx, summary=True): r"""Creates a tensor variable of which initial values are of an orthogonal ndarray. See [Saxe et al. 2014.](http://arxiv.org/pdf/1312.6120.pdf) Args: name: The name of new variable. shape: A tuple/list of integers. scale: A Python scalar. dtype: Either float32 or float64. summary: If True, add this constant to tensor board summary. Returns: A `Variable`. """ flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) # create variable x = tf.get_variable(name, initializer=tf.constant(scale * q[:shape[0], :shape[1]], dtype=dtype)) # add summary if not tf.get_variable_scope().reuse and summary: tf.sg_summary_param(x) return x
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx): r"""Returns a random orthogonal initializer. See Saxe et al. 2014 `http://arxiv.org/pdf/1312.6120.pdf` Args: name: A string. The name of the new or existing variable. shape: A list or tuple of integers. scale: A Python scalr. dtype = A float32 or float64. Returns: A `Tensor` variable. """ flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) # create variable x = tf.get_variable(name, initializer=tf.constant(scale * q[:shape[0], :shape[1]], dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def _data_to_tensor(data_list, batch_size, name=None): r"""Returns batch queues from the whole data. Args: data_list: A list of ndarrays. Every array must have the same size in the first dimension. batch_size: An integer. name: A name for the operations (optional). Returns: A list of tensors of `batch_size`. """ # convert to constant tensor const_list = [tf.constant(data) for data in data_list] # create queue from constant tensor queue_list = tf.train.slice_input_producer(const_list, capacity=batch_size * 10, name=name) # create batch queue return tf.train.shuffle_batch(queue_list, batch_size, capacity=batch_size * 10, min_after_dequeue=batch_size * 1, name=name)
def external(name, value, dtype=tf.sg_floatx): # create variable x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx): # Sax et aE. ( http://arxiv.org/pdf/1312.6120.pdf ) flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) # create variable x = tf.get_variable(name, initializer=tf.constant(scale * q[:shape[0], :shape[1]], dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def _data_to_tensor(data_list, batch_size, name=None): # convert to constant tensor const_list = [tf.constant(data) for data in data_list] # create queue from constant tensor queue_list = tf.train.slice_input_producer(const_list, capacity=batch_size * 128, name=name) # create batch queue return tf.train.shuffle_batch(queue_list, batch_size, capacity=batch_size * 128, min_after_dequeue=batch_size * 32, name=name)
def external(name, value, dtype=tf.sg_floatx): r"""Returns an initializer of `value`. Args: name: A string. The name of the new or existing variable. value: A constant value (or array) of output type `dtype`. dtype: The type of the elements of the resulting tensor. (optional) Returns: A `Tensor` variable. """ # create variable x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def identity(name, dim, scale=1, dtype=tf.sg_floatx, summary=True, regularizer=None, trainable=True): r"""Creates a tensor variable of which initial values are of an identity matrix. Note that the default value of `scale` (=0.05) is different from the min/max values (=0.0, 1.0) of tf.random_uniform_initializer. For example, ``` identity("identity", 3, 2) => [[2. 0. 0.] [0. 2. 0.] [0. 0. 2.]] ``` Args: name: The name of new variable. dim: An int. The size of the first and second dimension of the output tensor. scale: A Python scalar. The value on the diagonal. dtype: The type of the elements of the resulting tensor. summary: If True, add this constant to tensor board summary. regularizer: A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization trainable: If True, add this constant to trainable collection. Default is True. Returns: A 2-D `Variable`. """ x = tf.get_variable(name, initializer=tf.constant(np.eye(dim) * scale, dtype=dtype), regularizer=regularizer, trainable=trainable) # add summary if summary: tf.sg_summary_param(x) return x
def identity(name, dim, scale=1, dtype=tf.sg_floatx): r"""Returns an initializer of a 2-D identity tensor. Args: name: A string. The name of the new or existing variable. dim: An int. The size of the first and second dimension of the output tensor scale: An int (optional). The value on the diagonal. shape: Shape of the new or existing variable. dtype: A tensor datatype. Returns: A 2-D tensor variable with the value of `scale` on the diagoanl and zeros elsewhere. """ x = tf.get_variable(name, initializer=tf.constant(np.eye(dim) * scale, dtype=dtype)) # add summary if not tf.get_variable_scope().reuse: tf.sg_summary_param(x) return x
def rnn_body(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses): x = x_sent.read(time) y = x_sent.read(time + 1) # (batch, sentlen) = (16, 200) # shift target by one step for training source y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), y[:, :-1]], 1) reuse_vars = time == tf.constant(0) or reu_vars # -------------------------- BYTENET ENCODER -------------------------- # embed table lookup enc = x.sg_lookup(emb=emb_x) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(num_blocks): enc = (enc.sg_res_block( size=5, rate=1, name="enc1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars)) # -------------------------- QCNN + QPOOL ENCODER with attention #1 -------------------------- #quasi cnn layer ZFO [batch * 3, t, dim2 ] conv = enc.sg_quasi_conv1d(is_enc=True, size=3, name="qconv_1", reuse_vars=reuse_vars) #attention layer # recurrent layer # 1 + final encoder hidden state subrec1 = tf.tile((subrec1.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) concat = conv.sg_concat(target=subrec1, axis=0) # (batch*4, sentlen, latentdim) pool = concat.sg_quasi_rnn(is_enc=True, att=True, name="qrnn_1", reuse_vars=reuse_vars) subrec1 = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- QCNN + QPOOL ENCODER with attention #2 -------------------------- # quazi cnn ZFO (batch*3, sentlen, latentdim) conv = pool.sg_quasi_conv1d(is_enc=True, size=2, name="qconv_2", reuse_vars=reuse_vars) # (batch, sentlen-duplicated, latentdim) subrec2 = tf.tile((subrec2.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) # (batch*4, sentlen, latentdim) concat = conv.sg_concat(target=subrec2, axis=0) pool = concat.sg_quasi_rnn(is_enc=True, att=True, name="qrnn_2", reuse_vars=reuse_vars) subrec2 = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- ConvLSTM with RESIDUAL connection and MULTIPLICATIVE block -------------------------- #residual block causal = False # for encoder crnn_input = (pool[:Hp.batch_size, :, :].sg_bypass_gpus( name='relu_0', act='relu', bn=(not causal), ln=causal).sg_conv1d_gpus(name="dimred_0", size=1, dev="/cpu:0", reuse=reuse_vars, dim=Hp.hd / 2, act='relu', bn=(not causal), ln=causal)) # conv LSTM with tf.variable_scope("mem/clstm") as scp: (crnn_state, crnn_h) = crnn_cell(crnn_input, (crnn_state, crnn_h), size=5, reuse_vars=reuse_vars) # dimension recover and residual connection rnn_input0 = pool[:Hp.batch_size,:,:] + crnn_h\ .sg_conv1d_gpus(name = "diminc_0",size=1,dev="/cpu:0", dim=Hp.hd,reuse=reuse_vars, act='relu', bn=(not causal), ln=causal) # -------------------------- QCNN + QPOOL ENCODER with attention #3 -------------------------- # pooling for lstm input # quazi cnn ZFO (batch*3, sentlen, latentdim) conv = rnn_input0.sg_quasi_conv1d(is_enc=True, size=2, name="qconv_3", reuse_vars=reuse_vars) pool = conv.sg_quasi_rnn(is_enc=True, att=False, name="qrnn_3", reuse_vars=reuse_vars) rnn_input = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- LSTM with RESIDUAL connection and MULTIPLICATIVE block -------------------------- # recurrent block with tf.variable_scope("mem/lstm") as scp: (rnn_state, rnn_h) = rnn_cell(rnn_input, (rnn_state, rnn_h)) rnn_h2 = tf.tile(((rnn_h + rnn_input).sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) # -------------------------- BYTENET DECODER -------------------------- # CNN decoder dec = y_src.sg_lookup(emb=emb_y).sg_concat(target=rnn_h2, name="dec") for i in range(num_blocks): dec = (dec.sg_res_block( size=3, rate=1, causal=True, name="dec1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=2, causal=True, name="dec2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=4, causal=True, name="dec4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=8, causal=True, name="dec8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=16, causal=True, name="dec16_%d" % (i), reuse_vars=reuse_vars)) # final fully convolution layer for softmax dec = dec.sg_conv1d_gpus(size=1, dim=Hp.vs, name="out", summary=False, dev=self._dev, reuse=reuse_vars) ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example") cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy') losses = tf.add_n([losses, cross_entropy_mean], name='total_loss') return (time + 1, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses)
def tower_infer_dec(chars, scope, rnn_cell, dec_cell, word_emb, rnn_state, out_reuse_vars=False, dev='/cpu:0'): with tf.device(dev): with tf.variable_scope('embatch_size', reuse=True): # (vocab_size, latent_dim) emb_char = tf.sg_emb(name='emb_char', voca_size=Hp.char_vs, dim=Hp.hd, dev=dev) emb_word = tf.sg_emb(name='emb_word', emb=word_emb, voca_size=Hp.word_vs, dim=300, dev=dev) print(chars) ch = chars ch = tf.reverse_sequence(input=ch, seq_lengths=[Hp.c_maxlen] * Hp.batch_size, seq_dim=1) reuse_vars = reuse_vars_enc = True # -------------------------- BYTENET ENCODER -------------------------- with tf.variable_scope('encoder'): # embed table lookup enc = ch.sg_lookup(emb=emb_char) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(Hp.num_blocks): enc = (enc.sg_res_block(size=5, rate=1, name="enc1_%d" % (i), is_first=True, reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars, dev=dev)) byte_enc = enc # -------------------------- QCNN + QPOOL ENCODER #1 -------------------------- with tf.variable_scope('quazi'): #quasi cnn layer ZFO [batch * 3, seqlen, dim2 ] conv = byte_enc.sg_quasi_conv1d(is_enc=True, size=4, name="qconv_1", dev=dev, reuse_vars=reuse_vars) # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd] pool0 = conv.sg_quasi_rnn(is_enc=False, att=False, name="qrnn_1", reuse_vars=reuse_vars, dev=dev) qpool_last = pool0[:, -1, :] # -------------------------- MAXPOOL along time dimension -------------------------- inpt_maxpl = tf.expand_dims(byte_enc, 1) # [batch, 1, seqlen, channels] maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1], 'VALID') maxpool = tf.squeeze(maxpool, [1, 2]) # -------------------------- HIGHWAY -------------------------- concat = qpool_last + maxpool with tf.variable_scope('highway', reuse=reuse_vars): input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1) # -------------------------- CONTEXT LSTM -------------------------- input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob) with tf.variable_scope('contx_lstm', reuse=reuse_vars): output, rnn_state = rnn_cell(input_lstm, rnn_state) beam_size = 8 reuse_vars = out_reuse_vars greedy = False if greedy: dec_state = rnn_state dec_out = [] d_out = tf.constant([1] * Hp.batch_size) for idx in range(Hp.w_maxlen): w_input = d_out.sg_lookup(emb=emb_word) dec_state = tf.contrib.rnn.LSTMStateTuple(c=dec_state.c, h=dec_state.h) with tf.variable_scope('dec_lstm', reuse=idx > 0 or reuse_vars): d_out, dec_state = dec_cell(w_input, dec_state) dec_out.append(d_out) d_out = tf.expand_dims(d_out, 1).sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=idx > 0 or reuse_vars) d_out = tf.squeeze(d_out).sg_argmax() dec_out = tf.stack(dec_out, 1) dec = dec_out.sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=True) return dec.sg_argmax(), rnn_state else: # ------------------ BEAM SEARCH -------------------- dec_state = tf.contrib.rnn.LSTMStateTuple( tf.tile(tf.expand_dims(rnn_state[0], 1), [1, beam_size, 1]), tf.tile(tf.expand_dims(rnn_state[1], 1), [1, beam_size, 1])) initial_ids = tf.constant([1] * Hp.batch_size) def symbols_to_logits_fn(ids, dec_state): dec = [] dec_c, dec_h = [], [] # (batch x beam_size x decoded_seq) ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1]) print("dec_state ", dec_state[0].get_shape().as_list()) for ind in range(beam_size): with tf.variable_scope('dec_lstm', reuse=ind > 0 or reuse_vars): w_input = ids[:, ind, -1].sg_lookup(emb=emb_word) dec_state0 = tf.contrib.rnn.LSTMStateTuple( c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :]) dec_out, dec_state_i = dec_cell(w_input, dec_state0) dec_out = tf.expand_dims(dec_out, 1) dec_i = dec_out.sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=ind > 0 or reuse_vars) dec.append(tf.squeeze(dec_i, 1)) dec_c.append(dec_state_i[0]) dec_h.append(dec_state_i[1]) return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple( tf.stack(dec_c, 1), tf.stack(dec_h, 1)) final_ids, final_probs = beam_search.beam_search(symbols_to_logits_fn, dec_state, initial_ids, beam_size, Hp.w_maxlen - 1, Hp.word_vs, 3.5, eos_id=2) return final_ids[:, 0, :], rnn_state
inputs, seq_len, dtype=tf.float32) shape = tf.shape(inputs) batch_s, TF_max_timesteps = shape[0], shape[1] with tf.name_scope('outputs'): outputs = tf.reshape(outputs, [-1, num_hidden]) with tf.name_scope('weights'): W = tf.Variable(tf.truncated_normal([num_hidden, num_classes], stddev=0.1), name='weights') with tf.name_scope('biases'): b = tf.get_variable("b", initializer=tf.constant(0., shape=[num_classes])) with tf.name_scope('logits'): logits = tf.matmul(outputs, W) + b logits = tf.reshape(logits, [batch_s, -1, num_classes]) logits = tf.transpose(logits, (1, 0, 2), name="out/logits") with tf.name_scope('loss'): loss = tf.nn.ctc_loss(targets, logits, seq_len, ctc_merge_repeated=True, preprocess_collapse_repeated=True) with tf.name_scope('cost'): cost = tf.reduce_mean(loss) tf.summary.scalar("cost", cost) with tf.name_scope('optimizer'):
def log10(x): numerator = tenf.log(x) denominator = tenf.log(tf.constant(10, dtype=numerator.dtype)) return numerator / denominator
def tower_infer_enc(chars, scope, rnn_cell, dec_cell, word_emb, out_reuse_vars=False, dev='/cpu:0'): out_rvars = out_reuse_vars # make embedding matrix for source and target with tf.device(dev): with tf.variable_scope('embatch_size', reuse=out_reuse_vars): # (vocab_size, latent_dim) emb_char = tf.sg_emb(name='emb_char', voca_size=Hp.char_vs, dim=Hp.hd, dev=dev) emb_word = tf.sg_emb(name='emb_word', emb=word_emb, voca_size=Hp.word_vs, dim=300, dev=dev) chars = tf.cast(chars, tf.int32) time = tf.constant(0) inputs = tf.transpose(chars, perm=[1, 0, 2]) input_ta = tensor_array_ops.TensorArray(tf.int32, size=tf.shape(chars)[1], dynamic_size=True, clear_after_read=True) chars_sent = input_ta.unstack(inputs) #each element is (batch, sentlen) resp_steps = tf.shape(chars)[1] # number of sentences in paragraph statm_steps = resp_steps // 2 rnn_state = rnn_cell.zero_state( Hp.batch_size, tf.float32) #rnn_cell.rnn_state, rnn_cell.rnn_h maxdecode = 3 # -------------------------------------------- STATEMENT ENCODING ----------------------------------------------- def rnn_cond_stat(time, rnn_state): return tf.less(time, statm_steps - 1) def rnn_body_stat(time, rnn_state): ch = chars_sent.read(time) ch = tf.reverse_sequence(input=ch, seq_lengths=[Hp.c_maxlen] * Hp.batch_size, seq_dim=1) reuse_vars = out_reuse_vars # -------------------------- BYTENET ENCODER -------------------------- with tf.variable_scope('encoder'): # embed table lookup enc = ch.sg_lookup(emb=emb_char) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(Hp.num_blocks): enc = (enc.sg_res_block(size=5, rate=1, name="enc1_%d" % (i), is_first=True, reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars, dev=dev)) byte_enc = enc # -------------------------- QCNN + QPOOL ENCODER #1 -------------------------- with tf.variable_scope('quazi'): #quasi cnn layer ZFO [batch * 3, seqlen, dim2 ] conv = byte_enc.sg_quasi_conv1d(is_enc=True, size=4, name="qconv_1", dev=dev, reuse_vars=reuse_vars) # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd] pool0 = conv.sg_quasi_rnn(is_enc=False, att=False, name="qrnn_1", reuse_vars=reuse_vars, dev=dev) qpool_last = pool0[:, -1, :] # -------------------------- MAXPOOL along time dimension -------------------------- inpt_maxpl = tf.expand_dims(byte_enc, 1) # [batch, 1, seqlen, channels] maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1], 'VALID') maxpool = tf.squeeze(maxpool, [1, 2]) # -------------------------- HIGHWAY -------------------------- concat = qpool_last + maxpool with tf.variable_scope('highway', reuse=reuse_vars): input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1) # -------------------------- CONTEXT LSTM -------------------------- input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob) with tf.variable_scope('contx_lstm', reuse=reuse_vars): output, rnn_state = rnn_cell(input_lstm, rnn_state) return (time + 1, rnn_state) loop_vars_stat = [time, rnn_state] time, rnn_state = tf.while_loop\ (rnn_cond_stat, rnn_body_stat, loop_vars_stat, swap_memory=False) return rnn_state
def __load_data(self, file_names, record_defaults, data_column, bucket_boundaries, field_delim=__DEFAULT_DELIM, skip_header_lines=0, num_epochs=None, shuffle=True): original_file_names = file_names[:] file_names = self.__generate_preprocessed_files( file_names, data_column, field_delim=field_delim) filename_queue = tf.train.string_input_producer(file_names, num_epochs=num_epochs, shuffle=shuffle) sentence, pos, chunks, capitals, entities = self._read_file( filename_queue, record_defaults, field_delim, skip_header_lines) voca_path, voca_suffix = BaseDataLoader._split_file_to_path_and_name( original_file_names[0] ) # TODO: will be break with multiple filenames voca_name = ConllPreprocessor.VOCABULARY_PREFIX + voca_suffix self.__vocabulary_file = voca_path + voca_name # load look up tables that maps words to ids if self.table is None: print('vocabulary table is None => creating it') main_voca_file = voca_path + voca_name if self._use_pretrained_emb: self.pretrained_emb_matrix, vocabulary = self.preload_embeddings( embed_dim=self._embed_dim, file_name=self._pretrained_emb_file, train_vocabulary=main_voca_file, other_vocabularies=self._other_voca_files) tensor_vocabulary = tf.constant(vocabulary) self.table = tf.contrib.lookup.index_table_from_tensor( tensor_vocabulary, default_value=ConllPreprocessor.UNK_TOKEN_ID, num_oov_buckets=0) else: self.table = tf.contrib.lookup.index_table_from_file( vocabulary_file=main_voca_file, default_value=ConllPreprocessor.UNK_TOKEN_ID, num_oov_buckets=0) if self.table_pos is None: print('vocabulary table_pos is None => creating it') self.table_pos = tf.contrib.lookup.index_table_from_file( vocabulary_file=voca_path + self._TABLE_POS + voca_suffix, num_oov_buckets=0) if self.table_chunk is None: print('vocabulary table_chunk is None => creating it') self.table_chunk = tf.contrib.lookup.index_table_from_file( vocabulary_file=voca_path + self._TABLE_CHUNK + voca_suffix, num_oov_buckets=0) if self.table_entity is None: print('vocabulary table_entity is None => creating it') self.table_entity = tf.contrib.lookup.index_table_from_file( vocabulary_file=voca_path + self._TABLE_ENTITY + voca_suffix, num_oov_buckets=0) if self._used_for_test_data: print('Reverse vocabulary is needed => creating it') self.reverse_table = tf.contrib.lookup.index_to_string_table_from_file( vocabulary_file=voca_path + voca_name) print('Reverse entity vocabulary is needed => creating it') self.reverse_table_entity = tf.contrib.lookup.index_to_string_table_from_file( vocabulary_file=voca_path + self._TABLE_ENTITY + voca_suffix) # convert to tensor of strings split_sentence = tf.string_split([sentence], " ") split_pos = tf.string_split([pos], ' ') split_chunks = tf.string_split([chunks], ' ') split_capitals = tf.string_split([capitals], ' ') split_entities = tf.string_split([entities], ' ') # determine lengths of sequences line_number = split_sentence.indices[:, 0] line_position = split_sentence.indices[:, 1] lengths = ( tf.segment_max(data=line_position, segment_ids=line_number) + 1).sg_cast(dtype=tf.int32) # convert sparse to dense dense_sent = tf.sparse_tensor_to_dense(split_sentence, default_value="") dense_sent = self.table.lookup(dense_sent) dense_pos = tf.sparse_tensor_to_dense(split_pos, default_value="") dense_pos = self.table_pos.lookup(dense_pos) dense_chunks = tf.sparse_tensor_to_dense(split_chunks, default_value="") dense_chunks = self.table_chunk.lookup(dense_chunks) dense_capitals = tf.sparse_tensor_to_dense(split_capitals, default_value="") dense_capitals = tf.string_to_number(dense_capitals, out_type=tf.int64) dense_entities = tf.sparse_tensor_to_dense(split_entities, default_value="") dense_entities = self.table_entity.lookup(dense_entities) # get the enqueue op to pass to a coordinator to be run self.enqueue_op = self.shuffle_queue.enqueue([ dense_sent, dense_pos, dense_chunks, dense_capitals, dense_entities ]) dense_sent, dense_pos, dense_chunks, dense_capitals, dense_entities = self.shuffle_queue.dequeue( ) # add queue to queue runner self.qr = tf.train.QueueRunner(self.shuffle_queue, [self.enqueue_op] * self.num_threads) tf.train.queue_runner.add_queue_runner(self.qr) # reshape from <unknown> shape into proper form after dequeue from random shuffle queue # this is needed so next queue can automatically infer the shape properly dense_sent = dense_sent.sg_reshape(shape=[1, -1]) dense_pos = dense_pos.sg_reshape(shape=[1, -1]) dense_chunks = dense_chunks.sg_reshape(shape=[1, -1]) dense_capitals = dense_capitals.sg_reshape(shape=[1, -1]) dense_entities = dense_entities.sg_reshape(shape=[1, -1]) _, (padded_sent, padded_pos, padded_chunk, padded_capitals, padded_entities) = \ tf.contrib.training.bucket_by_sequence_length(lengths, [dense_sent, dense_pos, dense_chunks, dense_capitals, dense_entities], batch_size=self._batch_size, bucket_boundaries=bucket_boundaries, dynamic_pad=True, capacity=self._capacity, num_threads=self.num_threads, name='bucket_queue') # reshape shape into proper form after dequeue from bucket queue padded_sent = padded_sent.sg_reshape(shape=[self._batch_size, -1]) padded_pos = padded_pos.sg_reshape(shape=[self._batch_size, -1]) padded_chunk = padded_chunk.sg_reshape(shape=[self._batch_size, -1]) padded_capitals = padded_capitals.sg_reshape( shape=[self._batch_size, -1, 1]) padded_entities = padded_entities.sg_reshape( shape=[self._batch_size, -1]) return padded_sent, padded_pos, padded_chunk, padded_capitals, padded_entities
def __load_data(self, file_names, record_defaults, data_column, bucket_boundaries, field_delim=__DEFAULT_DELIM, skip_header_lines=0, num_epochs=None, shuffle=True): original_file_names = file_names[:] file_names = self.__generate_preprocessed_files(file_names, data_column, field_delim=field_delim) filename_queue = tf.train.string_input_producer( file_names, num_epochs=num_epochs, shuffle=shuffle ) sentence, pos, chunks, capitals, entities = self._read_file(filename_queue, record_defaults, field_delim, skip_header_lines) voca_path, voca_suffix = BaseDataLoader._split_file_to_path_and_name( original_file_names[0]) # TODO: will be break with multiple filenames voca_name = ConllPreprocessor.VOCABULARY_PREFIX + voca_suffix self.__vocabulary_file = voca_path + voca_name # load look up tables that maps words to ids if self.table is None: print('vocabulary table is None => creating it') main_voca_file = voca_path + voca_name if self._use_pretrained_emb: self.pretrained_emb_matrix, vocabulary = self.preload_embeddings(embed_dim=self._embed_dim, file_name=self._pretrained_emb_file, train_vocabulary=main_voca_file, other_vocabularies=self._other_voca_files) tensor_vocabulary = tf.constant(vocabulary) self.table = tf.contrib.lookup.index_table_from_tensor(tensor_vocabulary, default_value=ConllPreprocessor.UNK_TOKEN_ID, num_oov_buckets=0) else: self.table = tf.contrib.lookup.index_table_from_file(vocabulary_file=main_voca_file, default_value=ConllPreprocessor.UNK_TOKEN_ID, num_oov_buckets=0) if self.table_pos is None: print('vocabulary table_pos is None => creating it') self.table_pos = tf.contrib.lookup.index_table_from_file( vocabulary_file=voca_path + self._TABLE_POS + voca_suffix, num_oov_buckets=0) if self.table_chunk is None: print('vocabulary table_chunk is None => creating it') self.table_chunk = tf.contrib.lookup.index_table_from_file( vocabulary_file=voca_path + self._TABLE_CHUNK + voca_suffix, num_oov_buckets=0) if self.table_entity is None: print('vocabulary table_entity is None => creating it') self.table_entity = tf.contrib.lookup.index_table_from_file( vocabulary_file=voca_path + self._TABLE_ENTITY + voca_suffix, num_oov_buckets=0) if self._used_for_test_data: print('Reverse vocabulary is needed => creating it') self.reverse_table = tf.contrib.lookup.index_to_string_table_from_file( vocabulary_file=voca_path + voca_name) print('Reverse entity vocabulary is needed => creating it') self.reverse_table_entity = tf.contrib.lookup.index_to_string_table_from_file( vocabulary_file=voca_path + self._TABLE_ENTITY + voca_suffix) # convert to tensor of strings split_sentence = tf.string_split([sentence], " ") split_pos = tf.string_split([pos], ' ') split_chunks = tf.string_split([chunks], ' ') split_capitals = tf.string_split([capitals], ' ') split_entities = tf.string_split([entities], ' ') # determine lengths of sequences line_number = split_sentence.indices[:, 0] line_position = split_sentence.indices[:, 1] lengths = (tf.segment_max(data=line_position, segment_ids=line_number) + 1).sg_cast(dtype=tf.int32) # convert sparse to dense dense_sent = tf.sparse_tensor_to_dense(split_sentence, default_value="") dense_sent = self.table.lookup(dense_sent) dense_pos = tf.sparse_tensor_to_dense(split_pos, default_value="") dense_pos = self.table_pos.lookup(dense_pos) dense_chunks = tf.sparse_tensor_to_dense(split_chunks, default_value="") dense_chunks = self.table_chunk.lookup(dense_chunks) dense_capitals = tf.sparse_tensor_to_dense(split_capitals, default_value="") dense_capitals = tf.string_to_number(dense_capitals, out_type=tf.int64) dense_entities = tf.sparse_tensor_to_dense(split_entities, default_value="") dense_entities = self.table_entity.lookup(dense_entities) # get the enqueue op to pass to a coordinator to be run self.enqueue_op = self.shuffle_queue.enqueue( [dense_sent, dense_pos, dense_chunks, dense_capitals, dense_entities]) dense_sent, dense_pos, dense_chunks, dense_capitals, dense_entities = self.shuffle_queue.dequeue() # add queue to queue runner self.qr = tf.train.QueueRunner(self.shuffle_queue, [self.enqueue_op] * self.num_threads) tf.train.queue_runner.add_queue_runner(self.qr) # reshape from <unknown> shape into proper form after dequeue from random shuffle queue # this is needed so next queue can automatically infer the shape properly dense_sent = dense_sent.sg_reshape(shape=[1, -1]) dense_pos = dense_pos.sg_reshape(shape=[1, -1]) dense_chunks = dense_chunks.sg_reshape(shape=[1, -1]) dense_capitals = dense_capitals.sg_reshape(shape=[1, -1]) dense_entities = dense_entities.sg_reshape(shape=[1, -1]) _, (padded_sent, padded_pos, padded_chunk, padded_capitals, padded_entities) = \ tf.contrib.training.bucket_by_sequence_length(lengths, [dense_sent, dense_pos, dense_chunks, dense_capitals, dense_entities], batch_size=self._batch_size, bucket_boundaries=bucket_boundaries, dynamic_pad=True, capacity=self._capacity, num_threads=self.num_threads, name='bucket_queue') # reshape shape into proper form after dequeue from bucket queue padded_sent = padded_sent.sg_reshape(shape=[self._batch_size, -1]) padded_pos = padded_pos.sg_reshape(shape=[self._batch_size, -1]) padded_chunk = padded_chunk.sg_reshape(shape=[self._batch_size, -1]) padded_capitals = padded_capitals.sg_reshape(shape=[self._batch_size, -1, 1]) padded_entities = padded_entities.sg_reshape(shape=[self._batch_size, -1]) return padded_sent, padded_pos, padded_chunk, padded_capitals, padded_entities
def tower_loss_manyparams(xx, scope, reu_vars=False): # make embedding matrix for source and target reu_vars = reu_vars with tf.variable_scope('embatch_size', reuse=reu_vars): # (vocab_size, latent_dim) emb_x = tf.sg_emb(name='emb_x', voca_size=Hp.vs, dim=Hp.hd, dev=self._dev) emb_y = tf.sg_emb(name='emb_y', voca_size=Hp.vs, dim=Hp.hd, dev=self._dev) xx = tf.cast(xx, tf.int32) time = tf.constant(0) losses_int = tf.constant(0.0) inputs = tf.transpose(xx, perm=[1, 0, 2]) input_ta = tensor_array_ops.TensorArray(tf.int32, size=1, dynamic_size=True, clear_after_read=False) x_sent = input_ta.unstack(inputs) #each element is (batch, sentlen) n_steps = tf.shape(xx)[1] # number of sentences in paragraph # generate first an unconditioned sentence n_input = Hp.hd subrec1_init = subrec_zero_state(Hp.batch_size, Hp.hd) subrec2_init = subrec_zero_state(Hp.batch_size, Hp.hd) with tf.variable_scope("mem", reuse=reu_vars) as scp: rnn_cell = LSTMCell(in_dim=h, dim=Hp.hd) crnn_cell = ConvLSTMCell(seqlen=Hp.maxlen, in_dim=n_input // 2, dim=Hp.hd // 2) (rnn_state_init, rnn_h_init) = rnn_cell.zero_state(Hp.batch_size) # (batch, sentlen, latentdim/2) (crnn_state_init, crnn_h_init) = crnn_cell.zero_state(Hp.batch_size) def rnn_cond(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses): return tf.less(time, n_steps - 1) def rnn_body(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses): x = x_sent.read(time) y = x_sent.read(time + 1) # (batch, sentlen) = (16, 200) # shift target by one step for training source y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), y[:, :-1]], 1) reuse_vars = time == tf.constant(0) or reu_vars # -------------------------- BYTENET ENCODER -------------------------- # embed table lookup enc = x.sg_lookup(emb=emb_x) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(num_blocks): enc = (enc.sg_res_block( size=5, rate=1, name="enc1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars)) # -------------------------- QCNN + QPOOL ENCODER with attention #1 -------------------------- #quasi cnn layer ZFO [batch * 3, t, dim2 ] conv = enc.sg_quasi_conv1d(is_enc=True, size=3, name="qconv_1", reuse_vars=reuse_vars) #attention layer # recurrent layer # 1 + final encoder hidden state subrec1 = tf.tile((subrec1.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) concat = conv.sg_concat(target=subrec1, axis=0) # (batch*4, sentlen, latentdim) pool = concat.sg_quasi_rnn(is_enc=True, att=True, name="qrnn_1", reuse_vars=reuse_vars) subrec1 = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- QCNN + QPOOL ENCODER with attention #2 -------------------------- # quazi cnn ZFO (batch*3, sentlen, latentdim) conv = pool.sg_quasi_conv1d(is_enc=True, size=2, name="qconv_2", reuse_vars=reuse_vars) # (batch, sentlen-duplicated, latentdim) subrec2 = tf.tile((subrec2.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) # (batch*4, sentlen, latentdim) concat = conv.sg_concat(target=subrec2, axis=0) pool = concat.sg_quasi_rnn(is_enc=True, att=True, name="qrnn_2", reuse_vars=reuse_vars) subrec2 = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- ConvLSTM with RESIDUAL connection and MULTIPLICATIVE block -------------------------- #residual block causal = False # for encoder crnn_input = (pool[:Hp.batch_size, :, :].sg_bypass_gpus( name='relu_0', act='relu', bn=(not causal), ln=causal).sg_conv1d_gpus(name="dimred_0", size=1, dev="/cpu:0", reuse=reuse_vars, dim=Hp.hd / 2, act='relu', bn=(not causal), ln=causal)) # conv LSTM with tf.variable_scope("mem/clstm") as scp: (crnn_state, crnn_h) = crnn_cell(crnn_input, (crnn_state, crnn_h), size=5, reuse_vars=reuse_vars) # dimension recover and residual connection rnn_input0 = pool[:Hp.batch_size,:,:] + crnn_h\ .sg_conv1d_gpus(name = "diminc_0",size=1,dev="/cpu:0", dim=Hp.hd,reuse=reuse_vars, act='relu', bn=(not causal), ln=causal) # -------------------------- QCNN + QPOOL ENCODER with attention #3 -------------------------- # pooling for lstm input # quazi cnn ZFO (batch*3, sentlen, latentdim) conv = rnn_input0.sg_quasi_conv1d(is_enc=True, size=2, name="qconv_3", reuse_vars=reuse_vars) pool = conv.sg_quasi_rnn(is_enc=True, att=False, name="qrnn_3", reuse_vars=reuse_vars) rnn_input = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- LSTM with RESIDUAL connection and MULTIPLICATIVE block -------------------------- # recurrent block with tf.variable_scope("mem/lstm") as scp: (rnn_state, rnn_h) = rnn_cell(rnn_input, (rnn_state, rnn_h)) rnn_h2 = tf.tile(((rnn_h + rnn_input).sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) # -------------------------- BYTENET DECODER -------------------------- # CNN decoder dec = y_src.sg_lookup(emb=emb_y).sg_concat(target=rnn_h2, name="dec") for i in range(num_blocks): dec = (dec.sg_res_block( size=3, rate=1, causal=True, name="dec1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=2, causal=True, name="dec2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=4, causal=True, name="dec4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=8, causal=True, name="dec8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=16, causal=True, name="dec16_%d" % (i), reuse_vars=reuse_vars)) # final fully convolution layer for softmax dec = dec.sg_conv1d_gpus(size=1, dim=Hp.vs, name="out", summary=False, dev=self._dev, reuse=reuse_vars) ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example") cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy') losses = tf.add_n([losses, cross_entropy_mean], name='total_loss') return (time + 1, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses)