def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx, summary=True, regularizer=None, trainable=True):
    r"""Creates a tensor variable of which initial values are of
    an orthogonal ndarray.
    
    See [Saxe et al. 2014.](http://arxiv.org/pdf/1312.6120.pdf)
    
    Args:
      name: The name of new variable.
      shape: A tuple/list of integers. 
      scale: A Python scalar.
      dtype: Either float32 or float64.
      summary: If True, add this constant to tensor board summary.
      regularizer:  A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable
        will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization
      trainable: If True, add this constant to trainable collection. Default is True.

    Returns:
      A `Variable`.
    """
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    # pick the one with the correct shape
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    # create variable
    x = tf.get_variable(name,
                        initializer=tf.constant(scale * q[:shape[0], :shape[1]], dtype=dtype),
                        regularizer=regularizer, trainable=trainable)
    # add summary
    if summary:
        tf.sg_summary_param(x)
    return x
def external(name, value, dtype=tf.sg_floatx, summary=True, regularizer=None, trainable=True):
    r"""Creates a tensor variable of which initial values are `value`.
    
    For example,
    
    ```
    external("external", [3,3,1,2])
    => [3. 3. 1. 2.]
    ```
    
    Args:
      name: The name of new variable.
      value: A constant value (or list) of output type `dtype`.
      dtype: The type of the elements of the resulting tensor.
      summary: If True, add this constant to tensor board summary.
      regularizer:  A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable
        will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization
      trainable: If True, add this constant to trainable collection. Default is True.

    Returns:
      A `Variable`. Has the same contents as `value` of `dtype`. 
    """
    # create variable
    x = tf.get_variable(name,
                        initializer=tf.constant(value, dtype=dtype),
                        regularizer=regularizer, trainable=trainable)
    # add summary
    if summary:
        tf.sg_summary_param(x)
    return x
Beispiel #3
0
def external(name, value, dtype=tf.sg_floatx, summary=True):
    r"""Creates a tensor variable of which initial values are `value`.
    
    For example,
    
    ```
    external("external", [3,3,1,2])
    => [3. 3. 1. 2.]
    ```
    
    Args:
      name: The name of new variable.
      value: A constant value (or list) of output type `dtype`.
      dtype: The type of the elements of the resulting tensor.
      summary: If True, add this constant to tensor board summary.
    
    Returns:
      A `Variable`. Has the same contents as `value` of `dtype`. 
    """
    # create variable
    x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse and summary:
        tf.sg_summary_param(x)
    return x
Beispiel #4
0
def identity(name, dim, scale=1, dtype=tf.sg_floatx):
    x = tf.get_variable(name,
                        initializer=tf.constant(np.eye(dim) * scale, dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #5
0
def identity(name, dim, scale=1, dtype=tf.sg_floatx, summary=True):
    r"""Creates a tensor variable of which initial values are of
    an identity matrix.
    
    Note that the default value of `scale` (=0.05) is different from 
    the min/max values (=0.0, 1.0) of tf.random_uniform_initializer.
    
    For example,
    
    ```
    identity("identity", 3, 2) =>
    [[2. 0. 0.]
     [0. 2. 0.]
     [0. 0. 2.]]
    ```
    
    Args:
      name: The name of new variable.
      dim: An int. The size of the first and second dimension of the output tensor.
      scale: A Python scalar. The value on the diagonal.
      dtype: The type of the elements of the resulting tensor.
      summary: If True, add this constant to tensor board summary.
    
    Returns:
      A 2-D `Variable`.
    """
    x = tf.get_variable(name,
                        initializer=tf.constant(np.eye(dim) * scale,
                                                dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse and summary:
        tf.sg_summary_param(x)
    return x
Beispiel #6
0
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx, summary=True):
    r"""Creates a tensor variable of which initial values are of
    an orthogonal ndarray.
    
    See [Saxe et al. 2014.](http://arxiv.org/pdf/1312.6120.pdf)
    
    Args:
      name: The name of new variable.
      shape: A tuple/list of integers. 
      scale: A Python scalar.
      dtype: Either float32 or float64.
      summary: If True, add this constant to tensor board summary.
    
    Returns:
      A `Variable`.
    """
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    # pick the one with the correct shape
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    # create variable
    x = tf.get_variable(name,
                        initializer=tf.constant(scale *
                                                q[:shape[0], :shape[1]],
                                                dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse and summary:
        tf.sg_summary_param(x)
    return x
Beispiel #7
0
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx):
    r"""Returns a random orthogonal initializer.
    See Saxe et al. 2014 `http://arxiv.org/pdf/1312.6120.pdf`
    
    Args:
      name: A string. The name of the new or existing variable.
      shape: A list or tuple of integers.
      scale: A Python scalr.
      dtype = A float32 or float64.
    
    Returns:
      A `Tensor` variable.
    """
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    # pick the one with the correct shape
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    # create variable
    x = tf.get_variable(name,
                        initializer=tf.constant(scale *
                                                q[:shape[0], :shape[1]],
                                                dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #8
0
def _data_to_tensor(data_list, batch_size, name=None):
    r"""Returns batch queues from the whole data.

    Args:
      data_list: A list of ndarrays. Every array must have the same size in the first dimension.
      batch_size: An integer.
      name: A name for the operations (optional).

    Returns:
      A list of tensors of `batch_size`.
    """
    # convert to constant tensor
    const_list = [tf.constant(data) for data in data_list]

    # create queue from constant tensor
    queue_list = tf.train.slice_input_producer(const_list,
                                               capacity=batch_size * 10,
                                               name=name)

    # create batch queue
    return tf.train.shuffle_batch(queue_list,
                                  batch_size,
                                  capacity=batch_size * 10,
                                  min_after_dequeue=batch_size * 1,
                                  name=name)
Beispiel #9
0
def external(name, value, dtype=tf.sg_floatx):
    # create variable
    x = tf.get_variable(name,
                        initializer=tf.constant(value, dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #10
0
def orthogonal(name, shape, scale=1.1, dtype=tf.sg_floatx):
    # Sax et aE. ( http://arxiv.org/pdf/1312.6120.pdf )
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.normal(0.0, 1.0, flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    # pick the one with the correct shape
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    # create variable
    x = tf.get_variable(name,
                        initializer=tf.constant(scale * q[:shape[0], :shape[1]], dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #11
0
def _data_to_tensor(data_list, batch_size, name=None):

    # convert to constant tensor
    const_list = [tf.constant(data) for data in data_list]

    # create queue from constant tensor
    queue_list = tf.train.slice_input_producer(const_list,
                                               capacity=batch_size * 128,
                                               name=name)

    # create batch queue
    return tf.train.shuffle_batch(queue_list,
                                  batch_size,
                                  capacity=batch_size * 128,
                                  min_after_dequeue=batch_size * 32,
                                  name=name)
Beispiel #12
0
def external(name, value, dtype=tf.sg_floatx):
    r"""Returns an initializer of `value`.
    Args:
      name: A string. The name of the new or existing variable.
      value: A constant value (or array) of output type `dtype`.
      dtype: The type of the elements of the resulting tensor. (optional)
    
    Returns:
      A `Tensor` variable.  
    """
    # create variable
    x = tf.get_variable(name, initializer=tf.constant(value, dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #13
0
def identity(name,
             dim,
             scale=1,
             dtype=tf.sg_floatx,
             summary=True,
             regularizer=None,
             trainable=True):
    r"""Creates a tensor variable of which initial values are of
    an identity matrix.
    
    Note that the default value of `scale` (=0.05) is different from 
    the min/max values (=0.0, 1.0) of tf.random_uniform_initializer.
    
    For example,
    
    ```
    identity("identity", 3, 2) =>
    [[2. 0. 0.]
     [0. 2. 0.]
     [0. 0. 2.]]
    ```
    
    Args:
      name: The name of new variable.
      dim: An int. The size of the first and second dimension of the output tensor.
      scale: A Python scalar. The value on the diagonal.
      dtype: The type of the elements of the resulting tensor.
      summary: If True, add this constant to tensor board summary.
      regularizer:  A (Tensor -> Tensor or None) function; the result of applying it on a newly created variable
        will be added to the collection tf.GraphKeys.REGULARIZATION_LOSSES and can be used for regularization
      trainable: If True, add this constant to trainable collection. Default is True.

    Returns:
      A 2-D `Variable`.
    """
    x = tf.get_variable(name,
                        initializer=tf.constant(np.eye(dim) * scale,
                                                dtype=dtype),
                        regularizer=regularizer,
                        trainable=trainable)
    # add summary
    if summary:
        tf.sg_summary_param(x)
    return x
Beispiel #14
0
def identity(name, dim, scale=1, dtype=tf.sg_floatx):
    r"""Returns an initializer of a 2-D identity tensor.
    
    Args:
      name: A string. The name of the new or existing variable.
      dim: An int. The size of the first and second dimension of the output tensor
      scale: An int (optional). The value on the diagonal. 
      shape: Shape of the new or existing variable.
      dtype: A tensor datatype.
    
    Returns:
      A 2-D tensor variable with the value of `scale` on the diagoanl and zeros elsewhere.   
    """
    x = tf.get_variable(name,
                        initializer=tf.constant(np.eye(dim) * scale,
                                                dtype=dtype))
    # add summary
    if not tf.get_variable_scope().reuse:
        tf.sg_summary_param(x)
    return x
Beispiel #15
0
    def rnn_body(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h,
                 losses):
        x = x_sent.read(time)
        y = x_sent.read(time + 1)  #   (batch, sentlen) = (16, 200)

        # shift target by one step for training source
        y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), y[:, :-1]],
                          1)
        reuse_vars = time == tf.constant(0) or reu_vars

        # --------------------------   BYTENET ENCODER   --------------------------

        # embed table lookup
        enc = x.sg_lookup(emb=emb_x)  #(batch, sentlen, latentdim)
        # loop dilated conv block
        for i in range(num_blocks):
            enc = (enc.sg_res_block(
                size=5, rate=1, name="enc1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=5,
                    rate=2,
                    name="enc2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=5,
                        rate=4,
                        name="enc4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=5,
                            rate=8,
                            name="enc8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=5,
                                rate=16,
                                name="enc16_%d" % (i),
                                reuse_vars=reuse_vars))


# --------------------------   QCNN + QPOOL ENCODER with attention #1  --------------------------

#quasi cnn layer ZFO  [batch * 3, t, dim2 ]
        conv = enc.sg_quasi_conv1d(is_enc=True,
                                   size=3,
                                   name="qconv_1",
                                   reuse_vars=reuse_vars)
        #attention layer
        # recurrent layer # 1 + final encoder hidden state
        subrec1 = tf.tile((subrec1.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1])
        concat = conv.sg_concat(target=subrec1,
                                axis=0)  # (batch*4, sentlen, latentdim)
        pool = concat.sg_quasi_rnn(is_enc=True,
                                   att=True,
                                   name="qrnn_1",
                                   reuse_vars=reuse_vars)
        subrec1 = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   QCNN + QPOOL ENCODER with attention #2  --------------------------

        # quazi cnn ZFO (batch*3, sentlen, latentdim)
        conv = pool.sg_quasi_conv1d(is_enc=True,
                                    size=2,
                                    name="qconv_2",
                                    reuse_vars=reuse_vars)
        # (batch, sentlen-duplicated, latentdim)
        subrec2 = tf.tile((subrec2.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1])
        # (batch*4, sentlen, latentdim)
        concat = conv.sg_concat(target=subrec2, axis=0)
        pool = concat.sg_quasi_rnn(is_enc=True,
                                   att=True,
                                   name="qrnn_2",
                                   reuse_vars=reuse_vars)
        subrec2 = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   ConvLSTM with RESIDUAL connection and MULTIPLICATIVE block   --------------------------

        #residual block
        causal = False  # for encoder
        crnn_input = (pool[:Hp.batch_size, :, :].sg_bypass_gpus(
            name='relu_0', act='relu', bn=(not causal),
            ln=causal).sg_conv1d_gpus(name="dimred_0",
                                      size=1,
                                      dev="/cpu:0",
                                      reuse=reuse_vars,
                                      dim=Hp.hd / 2,
                                      act='relu',
                                      bn=(not causal),
                                      ln=causal))

        # conv LSTM
        with tf.variable_scope("mem/clstm") as scp:
            (crnn_state, crnn_h) = crnn_cell(crnn_input, (crnn_state, crnn_h),
                                             size=5,
                                             reuse_vars=reuse_vars)
        # dimension recover and residual connection
        rnn_input0 = pool[:Hp.batch_size,:,:] + crnn_h\
                    .sg_conv1d_gpus(name = "diminc_0",size=1,dev="/cpu:0", dim=Hp.hd,reuse=reuse_vars, act='relu', bn=(not causal), ln=causal)

        # --------------------------   QCNN + QPOOL ENCODER with attention #3  --------------------------

        # pooling for lstm input
        # quazi cnn ZFO (batch*3, sentlen, latentdim)
        conv = rnn_input0.sg_quasi_conv1d(is_enc=True,
                                          size=2,
                                          name="qconv_3",
                                          reuse_vars=reuse_vars)
        pool = conv.sg_quasi_rnn(is_enc=True,
                                 att=False,
                                 name="qrnn_3",
                                 reuse_vars=reuse_vars)
        rnn_input = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   LSTM with RESIDUAL connection and MULTIPLICATIVE block --------------------------

        # recurrent block
        with tf.variable_scope("mem/lstm") as scp:
            (rnn_state, rnn_h) = rnn_cell(rnn_input, (rnn_state, rnn_h))

        rnn_h2 = tf.tile(((rnn_h + rnn_input).sg_expand_dims(axis=1)),
                         [1, Hp.maxlen, 1])

        # --------------------------   BYTENET DECODER   --------------------------

        # CNN decoder
        dec = y_src.sg_lookup(emb=emb_y).sg_concat(target=rnn_h2, name="dec")

        for i in range(num_blocks):
            dec = (dec.sg_res_block(
                size=3,
                rate=1,
                causal=True,
                name="dec1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=3,
                    rate=2,
                    causal=True,
                    name="dec2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=3,
                        rate=4,
                        causal=True,
                        name="dec4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=3,
                            rate=8,
                            causal=True,
                            name="dec8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=3,
                                rate=16,
                                causal=True,
                                name="dec16_%d" % (i),
                                reuse_vars=reuse_vars))

        # final fully convolution layer for softmax
        dec = dec.sg_conv1d_gpus(size=1,
                                 dim=Hp.vs,
                                 name="out",
                                 summary=False,
                                 dev=self._dev,
                                 reuse=reuse_vars)

        ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example")
        cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy')

        losses = tf.add_n([losses, cross_entropy_mean], name='total_loss')

        return (time + 1, subrec1, subrec2, rnn_state, rnn_h, crnn_state,
                crnn_h, losses)
def tower_infer_dec(chars,
                    scope,
                    rnn_cell,
                    dec_cell,
                    word_emb,
                    rnn_state,
                    out_reuse_vars=False,
                    dev='/cpu:0'):

    with tf.device(dev):
        with tf.variable_scope('embatch_size', reuse=True):
            # (vocab_size, latent_dim)
            emb_char = tf.sg_emb(name='emb_char',
                                 voca_size=Hp.char_vs,
                                 dim=Hp.hd,
                                 dev=dev)
            emb_word = tf.sg_emb(name='emb_word',
                                 emb=word_emb,
                                 voca_size=Hp.word_vs,
                                 dim=300,
                                 dev=dev)

    print(chars)
    ch = chars
    ch = tf.reverse_sequence(input=ch,
                             seq_lengths=[Hp.c_maxlen] * Hp.batch_size,
                             seq_dim=1)
    reuse_vars = reuse_vars_enc = True

    # --------------------------   BYTENET ENCODER   --------------------------

    with tf.variable_scope('encoder'):
        # embed table lookup
        enc = ch.sg_lookup(emb=emb_char)  #(batch, sentlen, latentdim)
        # loop dilated conv block
        for i in range(Hp.num_blocks):
            enc = (enc.sg_res_block(size=5,
                                    rate=1,
                                    name="enc1_%d" % (i),
                                    is_first=True,
                                    reuse_vars=reuse_vars,
                                    dev=dev).sg_res_block(
                                        size=5,
                                        rate=2,
                                        name="enc2_%d" % (i),
                                        reuse_vars=reuse_vars,
                                        dev=dev).sg_res_block(
                                            size=5,
                                            rate=4,
                                            name="enc4_%d" % (i),
                                            reuse_vars=reuse_vars,
                                            dev=dev).sg_res_block(
                                                size=5,
                                                rate=8,
                                                name="enc8_%d" % (i),
                                                reuse_vars=reuse_vars,
                                                dev=dev).sg_res_block(
                                                    size=5,
                                                    rate=16,
                                                    name="enc16_%d" % (i),
                                                    reuse_vars=reuse_vars,
                                                    dev=dev))
        byte_enc = enc
        # --------------------------   QCNN + QPOOL ENCODER #1  --------------------------

        with tf.variable_scope('quazi'):

            #quasi cnn layer ZFO  [batch * 3, seqlen, dim2 ]
            conv = byte_enc.sg_quasi_conv1d(is_enc=True,
                                            size=4,
                                            name="qconv_1",
                                            dev=dev,
                                            reuse_vars=reuse_vars)
            # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd]
            pool0 = conv.sg_quasi_rnn(is_enc=False,
                                      att=False,
                                      name="qrnn_1",
                                      reuse_vars=reuse_vars,
                                      dev=dev)

            qpool_last = pool0[:, -1, :]

    # --------------------------   MAXPOOL along time dimension   --------------------------

    inpt_maxpl = tf.expand_dims(byte_enc, 1)  # [batch, 1, seqlen, channels]
    maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1],
                             'VALID')
    maxpool = tf.squeeze(maxpool, [1, 2])

    # --------------------------   HIGHWAY   --------------------------

    concat = qpool_last + maxpool
    with tf.variable_scope('highway', reuse=reuse_vars):
        input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1)

    # --------------------------   CONTEXT LSTM  --------------------------

    input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob)

    with tf.variable_scope('contx_lstm', reuse=reuse_vars):
        output, rnn_state = rnn_cell(input_lstm, rnn_state)

    beam_size = 8
    reuse_vars = out_reuse_vars

    greedy = False
    if greedy:

        dec_state = rnn_state
        dec_out = []
        d_out = tf.constant([1] * Hp.batch_size)
        for idx in range(Hp.w_maxlen):
            w_input = d_out.sg_lookup(emb=emb_word)
            dec_state = tf.contrib.rnn.LSTMStateTuple(c=dec_state.c,
                                                      h=dec_state.h)
            with tf.variable_scope('dec_lstm', reuse=idx > 0 or reuse_vars):
                d_out, dec_state = dec_cell(w_input, dec_state)

            dec_out.append(d_out)
            d_out = tf.expand_dims(d_out, 1).sg_conv1d_gpus(size=1,
                                                            dim=Hp.word_vs,
                                                            name="out_conv",
                                                            act="linear",
                                                            dev=dev,
                                                            reuse=idx > 0
                                                            or reuse_vars)
            d_out = tf.squeeze(d_out).sg_argmax()

        dec_out = tf.stack(dec_out, 1)

        dec = dec_out.sg_conv1d_gpus(size=1,
                                     dim=Hp.word_vs,
                                     name="out_conv",
                                     act="linear",
                                     dev=dev,
                                     reuse=True)
        return dec.sg_argmax(), rnn_state

    else:

        # ------------------ BEAM SEARCH --------------------
        dec_state = tf.contrib.rnn.LSTMStateTuple(
            tf.tile(tf.expand_dims(rnn_state[0], 1), [1, beam_size, 1]),
            tf.tile(tf.expand_dims(rnn_state[1], 1), [1, beam_size, 1]))
        initial_ids = tf.constant([1] * Hp.batch_size)

        def symbols_to_logits_fn(ids, dec_state):
            dec = []
            dec_c, dec_h = [], []
            # (batch x beam_size x decoded_seq)
            ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1])
            print("dec_state ", dec_state[0].get_shape().as_list())
            for ind in range(beam_size):
                with tf.variable_scope('dec_lstm', reuse=ind > 0
                                       or reuse_vars):
                    w_input = ids[:, ind, -1].sg_lookup(emb=emb_word)
                    dec_state0 = tf.contrib.rnn.LSTMStateTuple(
                        c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :])
                    dec_out, dec_state_i = dec_cell(w_input, dec_state0)
                    dec_out = tf.expand_dims(dec_out, 1)
                dec_i = dec_out.sg_conv1d_gpus(size=1,
                                               dim=Hp.word_vs,
                                               name="out_conv",
                                               act="linear",
                                               dev=dev,
                                               reuse=ind > 0 or reuse_vars)

                dec.append(tf.squeeze(dec_i, 1))
                dec_c.append(dec_state_i[0])
                dec_h.append(dec_state_i[1])
            return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple(
                tf.stack(dec_c, 1), tf.stack(dec_h, 1))

        final_ids, final_probs = beam_search.beam_search(symbols_to_logits_fn,
                                                         dec_state,
                                                         initial_ids,
                                                         beam_size,
                                                         Hp.w_maxlen - 1,
                                                         Hp.word_vs,
                                                         3.5,
                                                         eos_id=2)

        return final_ids[:, 0, :], rnn_state
Beispiel #17
0
                                           inputs,
                                           seq_len,
                                           dtype=tf.float32)
        shape = tf.shape(inputs)
        batch_s, TF_max_timesteps = shape[0], shape[1]

        with tf.name_scope('outputs'):
            outputs = tf.reshape(outputs, [-1, num_hidden])

        with tf.name_scope('weights'):
            W = tf.Variable(tf.truncated_normal([num_hidden, num_classes],
                                                stddev=0.1),
                            name='weights')
        with tf.name_scope('biases'):
            b = tf.get_variable("b",
                                initializer=tf.constant(0.,
                                                        shape=[num_classes]))

        with tf.name_scope('logits'):
            logits = tf.matmul(outputs, W) + b
            logits = tf.reshape(logits, [batch_s, -1, num_classes])
            logits = tf.transpose(logits, (1, 0, 2), name="out/logits")
        with tf.name_scope('loss'):
            loss = tf.nn.ctc_loss(targets,
                                  logits,
                                  seq_len,
                                  ctc_merge_repeated=True,
                                  preprocess_collapse_repeated=True)
        with tf.name_scope('cost'):
            cost = tf.reduce_mean(loss)
        tf.summary.scalar("cost", cost)
        with tf.name_scope('optimizer'):
def log10(x):
    numerator = tenf.log(x)
    denominator = tenf.log(tf.constant(10, dtype=numerator.dtype))
    return numerator / denominator
def tower_infer_enc(chars,
                    scope,
                    rnn_cell,
                    dec_cell,
                    word_emb,
                    out_reuse_vars=False,
                    dev='/cpu:0'):
    out_rvars = out_reuse_vars

    # make embedding matrix for source and target
    with tf.device(dev):
        with tf.variable_scope('embatch_size', reuse=out_reuse_vars):
            # (vocab_size, latent_dim)
            emb_char = tf.sg_emb(name='emb_char',
                                 voca_size=Hp.char_vs,
                                 dim=Hp.hd,
                                 dev=dev)
            emb_word = tf.sg_emb(name='emb_word',
                                 emb=word_emb,
                                 voca_size=Hp.word_vs,
                                 dim=300,
                                 dev=dev)

    chars = tf.cast(chars, tf.int32)

    time = tf.constant(0)

    inputs = tf.transpose(chars, perm=[1, 0, 2])
    input_ta = tensor_array_ops.TensorArray(tf.int32,
                                            size=tf.shape(chars)[1],
                                            dynamic_size=True,
                                            clear_after_read=True)
    chars_sent = input_ta.unstack(inputs)  #each element is (batch, sentlen)

    resp_steps = tf.shape(chars)[1]  # number of sentences in paragraph
    statm_steps = resp_steps // 2

    rnn_state = rnn_cell.zero_state(
        Hp.batch_size, tf.float32)  #rnn_cell.rnn_state, rnn_cell.rnn_h
    maxdecode = 3

    # -------------------------------------------- STATEMENT ENCODING -----------------------------------------------

    def rnn_cond_stat(time, rnn_state):
        return tf.less(time, statm_steps - 1)

    def rnn_body_stat(time, rnn_state):
        ch = chars_sent.read(time)
        ch = tf.reverse_sequence(input=ch,
                                 seq_lengths=[Hp.c_maxlen] * Hp.batch_size,
                                 seq_dim=1)
        reuse_vars = out_reuse_vars

        # --------------------------   BYTENET ENCODER   --------------------------

        with tf.variable_scope('encoder'):
            # embed table lookup
            enc = ch.sg_lookup(emb=emb_char)  #(batch, sentlen, latentdim)
            # loop dilated conv block
            for i in range(Hp.num_blocks):
                enc = (enc.sg_res_block(size=5,
                                        rate=1,
                                        name="enc1_%d" % (i),
                                        is_first=True,
                                        reuse_vars=reuse_vars,
                                        dev=dev).sg_res_block(
                                            size=5,
                                            rate=2,
                                            name="enc2_%d" % (i),
                                            reuse_vars=reuse_vars,
                                            dev=dev).sg_res_block(
                                                size=5,
                                                rate=4,
                                                name="enc4_%d" % (i),
                                                reuse_vars=reuse_vars,
                                                dev=dev).sg_res_block(
                                                    size=5,
                                                    rate=8,
                                                    name="enc8_%d" % (i),
                                                    reuse_vars=reuse_vars,
                                                    dev=dev).sg_res_block(
                                                        size=5,
                                                        rate=16,
                                                        name="enc16_%d" % (i),
                                                        reuse_vars=reuse_vars,
                                                        dev=dev))
            byte_enc = enc
            # --------------------------   QCNN + QPOOL ENCODER #1  --------------------------

            with tf.variable_scope('quazi'):

                #quasi cnn layer ZFO  [batch * 3, seqlen, dim2 ]
                conv = byte_enc.sg_quasi_conv1d(is_enc=True,
                                                size=4,
                                                name="qconv_1",
                                                dev=dev,
                                                reuse_vars=reuse_vars)
                # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd]
                pool0 = conv.sg_quasi_rnn(is_enc=False,
                                          att=False,
                                          name="qrnn_1",
                                          reuse_vars=reuse_vars,
                                          dev=dev)

                qpool_last = pool0[:, -1, :]

        # --------------------------   MAXPOOL along time dimension   --------------------------

        inpt_maxpl = tf.expand_dims(byte_enc,
                                    1)  # [batch, 1, seqlen, channels]
        maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1],
                                 [1, 1, 1, 1], 'VALID')
        maxpool = tf.squeeze(maxpool, [1, 2])

        # --------------------------   HIGHWAY   --------------------------

        concat = qpool_last + maxpool
        with tf.variable_scope('highway', reuse=reuse_vars):
            input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1)

        # --------------------------   CONTEXT LSTM  --------------------------
        input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob)

        with tf.variable_scope('contx_lstm', reuse=reuse_vars):
            output, rnn_state = rnn_cell(input_lstm, rnn_state)

        return (time + 1, rnn_state)

    loop_vars_stat = [time, rnn_state]

    time, rnn_state = tf.while_loop\
                      (rnn_cond_stat, rnn_body_stat, loop_vars_stat, swap_memory=False)

    return rnn_state
    def __load_data(self,
                    file_names,
                    record_defaults,
                    data_column,
                    bucket_boundaries,
                    field_delim=__DEFAULT_DELIM,
                    skip_header_lines=0,
                    num_epochs=None,
                    shuffle=True):

        original_file_names = file_names[:]
        file_names = self.__generate_preprocessed_files(
            file_names, data_column, field_delim=field_delim)

        filename_queue = tf.train.string_input_producer(file_names,
                                                        num_epochs=num_epochs,
                                                        shuffle=shuffle)

        sentence, pos, chunks, capitals, entities = self._read_file(
            filename_queue, record_defaults, field_delim, skip_header_lines)

        voca_path, voca_suffix = BaseDataLoader._split_file_to_path_and_name(
            original_file_names[0]
        )  # TODO: will be break with multiple filenames
        voca_name = ConllPreprocessor.VOCABULARY_PREFIX + voca_suffix
        self.__vocabulary_file = voca_path + voca_name

        # load look up tables that maps words to ids
        if self.table is None:
            print('vocabulary table is None => creating it')
            main_voca_file = voca_path + voca_name

            if self._use_pretrained_emb:
                self.pretrained_emb_matrix, vocabulary = self.preload_embeddings(
                    embed_dim=self._embed_dim,
                    file_name=self._pretrained_emb_file,
                    train_vocabulary=main_voca_file,
                    other_vocabularies=self._other_voca_files)
                tensor_vocabulary = tf.constant(vocabulary)
                self.table = tf.contrib.lookup.index_table_from_tensor(
                    tensor_vocabulary,
                    default_value=ConllPreprocessor.UNK_TOKEN_ID,
                    num_oov_buckets=0)
            else:
                self.table = tf.contrib.lookup.index_table_from_file(
                    vocabulary_file=main_voca_file,
                    default_value=ConllPreprocessor.UNK_TOKEN_ID,
                    num_oov_buckets=0)

        if self.table_pos is None:
            print('vocabulary table_pos is None => creating it')
            self.table_pos = tf.contrib.lookup.index_table_from_file(
                vocabulary_file=voca_path + self._TABLE_POS + voca_suffix,
                num_oov_buckets=0)

        if self.table_chunk is None:
            print('vocabulary table_chunk is None => creating it')
            self.table_chunk = tf.contrib.lookup.index_table_from_file(
                vocabulary_file=voca_path + self._TABLE_CHUNK + voca_suffix,
                num_oov_buckets=0)

        if self.table_entity is None:
            print('vocabulary table_entity is None => creating it')
            self.table_entity = tf.contrib.lookup.index_table_from_file(
                vocabulary_file=voca_path + self._TABLE_ENTITY + voca_suffix,
                num_oov_buckets=0)

        if self._used_for_test_data:
            print('Reverse vocabulary is needed => creating it')
            self.reverse_table = tf.contrib.lookup.index_to_string_table_from_file(
                vocabulary_file=voca_path + voca_name)
            print('Reverse entity vocabulary is needed => creating it')
            self.reverse_table_entity = tf.contrib.lookup.index_to_string_table_from_file(
                vocabulary_file=voca_path + self._TABLE_ENTITY + voca_suffix)

        # convert to tensor of strings
        split_sentence = tf.string_split([sentence], " ")
        split_pos = tf.string_split([pos], ' ')
        split_chunks = tf.string_split([chunks], ' ')
        split_capitals = tf.string_split([capitals], ' ')
        split_entities = tf.string_split([entities], ' ')

        # determine lengths of sequences
        line_number = split_sentence.indices[:, 0]
        line_position = split_sentence.indices[:, 1]
        lengths = (
            tf.segment_max(data=line_position, segment_ids=line_number) +
            1).sg_cast(dtype=tf.int32)

        # convert sparse to dense
        dense_sent = tf.sparse_tensor_to_dense(split_sentence,
                                               default_value="")
        dense_sent = self.table.lookup(dense_sent)

        dense_pos = tf.sparse_tensor_to_dense(split_pos, default_value="")
        dense_pos = self.table_pos.lookup(dense_pos)

        dense_chunks = tf.sparse_tensor_to_dense(split_chunks,
                                                 default_value="")
        dense_chunks = self.table_chunk.lookup(dense_chunks)

        dense_capitals = tf.sparse_tensor_to_dense(split_capitals,
                                                   default_value="")
        dense_capitals = tf.string_to_number(dense_capitals, out_type=tf.int64)

        dense_entities = tf.sparse_tensor_to_dense(split_entities,
                                                   default_value="")
        dense_entities = self.table_entity.lookup(dense_entities)

        # get the enqueue op to pass to a coordinator to be run
        self.enqueue_op = self.shuffle_queue.enqueue([
            dense_sent, dense_pos, dense_chunks, dense_capitals, dense_entities
        ])
        dense_sent, dense_pos, dense_chunks, dense_capitals, dense_entities = self.shuffle_queue.dequeue(
        )

        # add queue to queue runner
        self.qr = tf.train.QueueRunner(self.shuffle_queue,
                                       [self.enqueue_op] * self.num_threads)
        tf.train.queue_runner.add_queue_runner(self.qr)

        # reshape from <unknown> shape into proper form after dequeue from random shuffle queue
        # this is needed so next queue can automatically infer the shape properly
        dense_sent = dense_sent.sg_reshape(shape=[1, -1])
        dense_pos = dense_pos.sg_reshape(shape=[1, -1])
        dense_chunks = dense_chunks.sg_reshape(shape=[1, -1])
        dense_capitals = dense_capitals.sg_reshape(shape=[1, -1])
        dense_entities = dense_entities.sg_reshape(shape=[1, -1])

        _, (padded_sent, padded_pos, padded_chunk, padded_capitals, padded_entities) = \
            tf.contrib.training.bucket_by_sequence_length(lengths,
                                                          [dense_sent, dense_pos, dense_chunks, dense_capitals,
                                                           dense_entities],
                                                          batch_size=self._batch_size,
                                                          bucket_boundaries=bucket_boundaries,
                                                          dynamic_pad=True,
                                                          capacity=self._capacity,
                                                          num_threads=self.num_threads, name='bucket_queue')

        # reshape shape into proper form after dequeue from bucket queue
        padded_sent = padded_sent.sg_reshape(shape=[self._batch_size, -1])
        padded_pos = padded_pos.sg_reshape(shape=[self._batch_size, -1])
        padded_chunk = padded_chunk.sg_reshape(shape=[self._batch_size, -1])
        padded_capitals = padded_capitals.sg_reshape(
            shape=[self._batch_size, -1, 1])
        padded_entities = padded_entities.sg_reshape(
            shape=[self._batch_size, -1])

        return padded_sent, padded_pos, padded_chunk, padded_capitals, padded_entities
    def __load_data(self, file_names, record_defaults, data_column, bucket_boundaries, field_delim=__DEFAULT_DELIM,
                    skip_header_lines=0,
                    num_epochs=None, shuffle=True):

        original_file_names = file_names[:]
        file_names = self.__generate_preprocessed_files(file_names, data_column, field_delim=field_delim)

        filename_queue = tf.train.string_input_producer(
            file_names, num_epochs=num_epochs, shuffle=shuffle
        )

        sentence, pos, chunks, capitals, entities = self._read_file(filename_queue, record_defaults, field_delim,
                                                                    skip_header_lines)

        voca_path, voca_suffix = BaseDataLoader._split_file_to_path_and_name(
            original_file_names[0])  # TODO: will be break with multiple filenames
        voca_name = ConllPreprocessor.VOCABULARY_PREFIX + voca_suffix
        self.__vocabulary_file = voca_path + voca_name

        # load look up tables that maps words to ids
        if self.table is None:
            print('vocabulary table is None => creating it')
            main_voca_file = voca_path + voca_name

            if self._use_pretrained_emb:
                self.pretrained_emb_matrix, vocabulary = self.preload_embeddings(embed_dim=self._embed_dim,
                                                                                 file_name=self._pretrained_emb_file,
                                                                                 train_vocabulary=main_voca_file,
                                                                                 other_vocabularies=self._other_voca_files)
                tensor_vocabulary = tf.constant(vocabulary)
                self.table = tf.contrib.lookup.index_table_from_tensor(tensor_vocabulary,
                                                                       default_value=ConllPreprocessor.UNK_TOKEN_ID,
                                                                       num_oov_buckets=0)
            else:
                self.table = tf.contrib.lookup.index_table_from_file(vocabulary_file=main_voca_file,
                                                                     default_value=ConllPreprocessor.UNK_TOKEN_ID,
                                                                     num_oov_buckets=0)

        if self.table_pos is None:
            print('vocabulary table_pos is None => creating it')
            self.table_pos = tf.contrib.lookup.index_table_from_file(
                vocabulary_file=voca_path + self._TABLE_POS + voca_suffix,
                num_oov_buckets=0)

        if self.table_chunk is None:
            print('vocabulary table_chunk is None => creating it')
            self.table_chunk = tf.contrib.lookup.index_table_from_file(
                vocabulary_file=voca_path + self._TABLE_CHUNK + voca_suffix,
                num_oov_buckets=0)

        if self.table_entity is None:
            print('vocabulary table_entity is None => creating it')
            self.table_entity = tf.contrib.lookup.index_table_from_file(
                vocabulary_file=voca_path + self._TABLE_ENTITY + voca_suffix,
                num_oov_buckets=0)

        if self._used_for_test_data:
            print('Reverse vocabulary is needed => creating it')
            self.reverse_table = tf.contrib.lookup.index_to_string_table_from_file(
                vocabulary_file=voca_path + voca_name)
            print('Reverse entity vocabulary is needed => creating it')
            self.reverse_table_entity = tf.contrib.lookup.index_to_string_table_from_file(
                vocabulary_file=voca_path + self._TABLE_ENTITY + voca_suffix)

        # convert to tensor of strings
        split_sentence = tf.string_split([sentence], " ")
        split_pos = tf.string_split([pos], ' ')
        split_chunks = tf.string_split([chunks], ' ')
        split_capitals = tf.string_split([capitals], ' ')
        split_entities = tf.string_split([entities], ' ')

        # determine lengths of sequences
        line_number = split_sentence.indices[:, 0]
        line_position = split_sentence.indices[:, 1]
        lengths = (tf.segment_max(data=line_position,
                                  segment_ids=line_number) + 1).sg_cast(dtype=tf.int32)

        # convert sparse to dense
        dense_sent = tf.sparse_tensor_to_dense(split_sentence, default_value="")
        dense_sent = self.table.lookup(dense_sent)

        dense_pos = tf.sparse_tensor_to_dense(split_pos, default_value="")
        dense_pos = self.table_pos.lookup(dense_pos)

        dense_chunks = tf.sparse_tensor_to_dense(split_chunks, default_value="")
        dense_chunks = self.table_chunk.lookup(dense_chunks)

        dense_capitals = tf.sparse_tensor_to_dense(split_capitals, default_value="")
        dense_capitals = tf.string_to_number(dense_capitals, out_type=tf.int64)

        dense_entities = tf.sparse_tensor_to_dense(split_entities, default_value="")
        dense_entities = self.table_entity.lookup(dense_entities)

        # get the enqueue op to pass to a coordinator to be run
        self.enqueue_op = self.shuffle_queue.enqueue(
            [dense_sent, dense_pos, dense_chunks, dense_capitals, dense_entities])
        dense_sent, dense_pos, dense_chunks, dense_capitals, dense_entities = self.shuffle_queue.dequeue()

        # add queue to queue runner
        self.qr = tf.train.QueueRunner(self.shuffle_queue, [self.enqueue_op] * self.num_threads)
        tf.train.queue_runner.add_queue_runner(self.qr)

        # reshape from <unknown> shape into proper form after dequeue from random shuffle queue
        # this is needed so next queue can automatically infer the shape properly
        dense_sent = dense_sent.sg_reshape(shape=[1, -1])
        dense_pos = dense_pos.sg_reshape(shape=[1, -1])
        dense_chunks = dense_chunks.sg_reshape(shape=[1, -1])
        dense_capitals = dense_capitals.sg_reshape(shape=[1, -1])
        dense_entities = dense_entities.sg_reshape(shape=[1, -1])

        _, (padded_sent, padded_pos, padded_chunk, padded_capitals, padded_entities) = \
            tf.contrib.training.bucket_by_sequence_length(lengths,
                                                          [dense_sent, dense_pos, dense_chunks, dense_capitals,
                                                           dense_entities],
                                                          batch_size=self._batch_size,
                                                          bucket_boundaries=bucket_boundaries,
                                                          dynamic_pad=True,
                                                          capacity=self._capacity,
                                                          num_threads=self.num_threads, name='bucket_queue')

        # reshape shape into proper form after dequeue from bucket queue
        padded_sent = padded_sent.sg_reshape(shape=[self._batch_size, -1])
        padded_pos = padded_pos.sg_reshape(shape=[self._batch_size, -1])
        padded_chunk = padded_chunk.sg_reshape(shape=[self._batch_size, -1])
        padded_capitals = padded_capitals.sg_reshape(shape=[self._batch_size, -1, 1])
        padded_entities = padded_entities.sg_reshape(shape=[self._batch_size, -1])

        return padded_sent, padded_pos, padded_chunk, padded_capitals, padded_entities
Beispiel #22
0
def tower_loss_manyparams(xx, scope, reu_vars=False):
    # make embedding matrix for source and target
    reu_vars = reu_vars
    with tf.variable_scope('embatch_size', reuse=reu_vars):
        # (vocab_size, latent_dim)
        emb_x = tf.sg_emb(name='emb_x',
                          voca_size=Hp.vs,
                          dim=Hp.hd,
                          dev=self._dev)
        emb_y = tf.sg_emb(name='emb_y',
                          voca_size=Hp.vs,
                          dim=Hp.hd,
                          dev=self._dev)

    xx = tf.cast(xx, tf.int32)

    time = tf.constant(0)
    losses_int = tf.constant(0.0)
    inputs = tf.transpose(xx, perm=[1, 0, 2])
    input_ta = tensor_array_ops.TensorArray(tf.int32,
                                            size=1,
                                            dynamic_size=True,
                                            clear_after_read=False)
    x_sent = input_ta.unstack(inputs)  #each element is (batch, sentlen)

    n_steps = tf.shape(xx)[1]  # number of sentences in paragraph

    # generate first an unconditioned sentence
    n_input = Hp.hd
    subrec1_init = subrec_zero_state(Hp.batch_size, Hp.hd)
    subrec2_init = subrec_zero_state(Hp.batch_size, Hp.hd)

    with tf.variable_scope("mem", reuse=reu_vars) as scp:
        rnn_cell = LSTMCell(in_dim=h, dim=Hp.hd)
        crnn_cell = ConvLSTMCell(seqlen=Hp.maxlen,
                                 in_dim=n_input // 2,
                                 dim=Hp.hd // 2)

    (rnn_state_init, rnn_h_init) = rnn_cell.zero_state(Hp.batch_size)

    #   (batch, sentlen, latentdim/2)
    (crnn_state_init, crnn_h_init) = crnn_cell.zero_state(Hp.batch_size)

    def rnn_cond(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h,
                 losses):
        return tf.less(time, n_steps - 1)

    def rnn_body(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h,
                 losses):
        x = x_sent.read(time)
        y = x_sent.read(time + 1)  #   (batch, sentlen) = (16, 200)

        # shift target by one step for training source
        y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), y[:, :-1]],
                          1)
        reuse_vars = time == tf.constant(0) or reu_vars

        # --------------------------   BYTENET ENCODER   --------------------------

        # embed table lookup
        enc = x.sg_lookup(emb=emb_x)  #(batch, sentlen, latentdim)
        # loop dilated conv block
        for i in range(num_blocks):
            enc = (enc.sg_res_block(
                size=5, rate=1, name="enc1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=5,
                    rate=2,
                    name="enc2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=5,
                        rate=4,
                        name="enc4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=5,
                            rate=8,
                            name="enc8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=5,
                                rate=16,
                                name="enc16_%d" % (i),
                                reuse_vars=reuse_vars))


# --------------------------   QCNN + QPOOL ENCODER with attention #1  --------------------------

#quasi cnn layer ZFO  [batch * 3, t, dim2 ]
        conv = enc.sg_quasi_conv1d(is_enc=True,
                                   size=3,
                                   name="qconv_1",
                                   reuse_vars=reuse_vars)
        #attention layer
        # recurrent layer # 1 + final encoder hidden state
        subrec1 = tf.tile((subrec1.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1])
        concat = conv.sg_concat(target=subrec1,
                                axis=0)  # (batch*4, sentlen, latentdim)
        pool = concat.sg_quasi_rnn(is_enc=True,
                                   att=True,
                                   name="qrnn_1",
                                   reuse_vars=reuse_vars)
        subrec1 = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   QCNN + QPOOL ENCODER with attention #2  --------------------------

        # quazi cnn ZFO (batch*3, sentlen, latentdim)
        conv = pool.sg_quasi_conv1d(is_enc=True,
                                    size=2,
                                    name="qconv_2",
                                    reuse_vars=reuse_vars)
        # (batch, sentlen-duplicated, latentdim)
        subrec2 = tf.tile((subrec2.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1])
        # (batch*4, sentlen, latentdim)
        concat = conv.sg_concat(target=subrec2, axis=0)
        pool = concat.sg_quasi_rnn(is_enc=True,
                                   att=True,
                                   name="qrnn_2",
                                   reuse_vars=reuse_vars)
        subrec2 = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   ConvLSTM with RESIDUAL connection and MULTIPLICATIVE block   --------------------------

        #residual block
        causal = False  # for encoder
        crnn_input = (pool[:Hp.batch_size, :, :].sg_bypass_gpus(
            name='relu_0', act='relu', bn=(not causal),
            ln=causal).sg_conv1d_gpus(name="dimred_0",
                                      size=1,
                                      dev="/cpu:0",
                                      reuse=reuse_vars,
                                      dim=Hp.hd / 2,
                                      act='relu',
                                      bn=(not causal),
                                      ln=causal))

        # conv LSTM
        with tf.variable_scope("mem/clstm") as scp:
            (crnn_state, crnn_h) = crnn_cell(crnn_input, (crnn_state, crnn_h),
                                             size=5,
                                             reuse_vars=reuse_vars)
        # dimension recover and residual connection
        rnn_input0 = pool[:Hp.batch_size,:,:] + crnn_h\
                    .sg_conv1d_gpus(name = "diminc_0",size=1,dev="/cpu:0", dim=Hp.hd,reuse=reuse_vars, act='relu', bn=(not causal), ln=causal)

        # --------------------------   QCNN + QPOOL ENCODER with attention #3  --------------------------

        # pooling for lstm input
        # quazi cnn ZFO (batch*3, sentlen, latentdim)
        conv = rnn_input0.sg_quasi_conv1d(is_enc=True,
                                          size=2,
                                          name="qconv_3",
                                          reuse_vars=reuse_vars)
        pool = conv.sg_quasi_rnn(is_enc=True,
                                 att=False,
                                 name="qrnn_3",
                                 reuse_vars=reuse_vars)
        rnn_input = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   LSTM with RESIDUAL connection and MULTIPLICATIVE block --------------------------

        # recurrent block
        with tf.variable_scope("mem/lstm") as scp:
            (rnn_state, rnn_h) = rnn_cell(rnn_input, (rnn_state, rnn_h))

        rnn_h2 = tf.tile(((rnn_h + rnn_input).sg_expand_dims(axis=1)),
                         [1, Hp.maxlen, 1])

        # --------------------------   BYTENET DECODER   --------------------------

        # CNN decoder
        dec = y_src.sg_lookup(emb=emb_y).sg_concat(target=rnn_h2, name="dec")

        for i in range(num_blocks):
            dec = (dec.sg_res_block(
                size=3,
                rate=1,
                causal=True,
                name="dec1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=3,
                    rate=2,
                    causal=True,
                    name="dec2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=3,
                        rate=4,
                        causal=True,
                        name="dec4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=3,
                            rate=8,
                            causal=True,
                            name="dec8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=3,
                                rate=16,
                                causal=True,
                                name="dec16_%d" % (i),
                                reuse_vars=reuse_vars))

        # final fully convolution layer for softmax
        dec = dec.sg_conv1d_gpus(size=1,
                                 dim=Hp.vs,
                                 name="out",
                                 summary=False,
                                 dev=self._dev,
                                 reuse=reuse_vars)

        ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example")
        cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy')

        losses = tf.add_n([losses, cross_entropy_mean], name='total_loss')

        return (time + 1, subrec1, subrec2, rnn_state, rnn_h, crnn_state,
                crnn_h, losses)