예제 #1
0
def sg_reverse_seq(tensor, opt):
    r"""Reverses variable length slices.

    Before applying the pure tensorflow function tf.reverse_sequence,
      this function calculates sequence lengths by counting non-zeros.

    For example,
    
    ```
    tensor = [[1, 2, 3, 0, 0], [4, 5, 0, 0, 0]]
    tensor.sg_reverse_seq()
    => [[3 2 1 0 0]
        [5 4 0 0 0]]
    ```
        
    Args:
      tensor: A 2-D `Tensor` (automatically given by chain).
      opt:
        dim: Dimension to reverse. Default is 1.
        name : If provided, it replaces current tensor's name.

    Returns:
      A `Tensor` with the same shape and type as `tensor`.
    """
    # default sequence dimension
    opt += tf.sg_opt(dim=1)
    seq_len = tf.not_equal(tensor,
                           tf.zeros_like(tensor)).sg_int().sg_sum(dims=opt.dim)
    return tf.reverse_sequence(tensor, seq_len, opt.dim, name=opt.name)
예제 #2
0
파일: train.py 프로젝트: liean/quasi-rnn
def sg_quasi_rnn(tensor, opt):
    # Split
    if opt.att:
        H, Z, F, O = tf.split(tensor, 4, axis=0)  # (16, 150, 320) for all
    else:
        Z, F, O = tf.split(tensor, 3, axis=0)  # (16, 150, 320) for all

    # step func
    def step(z, f, o, c):
        '''
        Runs fo-pooling at each time step
        '''
        c = f * c + (1 - f) * z

        if opt.att:  # attention
            a = tf.nn.softmax(tf.einsum("ijk,ik->ij", H,
                                        c))  # alpha. (16, 150)
            k = (a.sg_expand_dims() * H).sg_sum(
                axis=1)  # attentional sum. (16, 320)
            h = o * (k.sg_dense(act="linear") + \
                     c.sg_dense(act="linear"))
        else:
            h = o * c

        return h, c  # hidden states, (new) cell memories

    # Do rnn loop
    c, hs = 0, []
    timesteps = tensor.get_shape().as_list()[1]
    for t in range(timesteps):
        z = Z[:, t, :]  # (16, 320)
        f = F[:, t, :]  # (16, 320)
        o = O[:, t, :]  # (16, 320)

        # apply step function
        h, c = step(z, f, o, c)  # (16, 320), (16, 320)

        # save result
        hs.append(h.sg_expand_dims(axis=1))

    # Concat to return
    H = tf.concat(hs, 1)  # (16, 150, 320)
    seqlen = tf.to_int32(
        tf.reduce_sum(tf.sign(tf.abs(tf.reduce_sum(H, axis=-1))),
                      1))  # (16,) float32
    h = tf.reverse_sequence(input=H, seq_lengths=seqlen,
                            seq_dim=1)[:, 0, :]  # last hidden state vector

    if opt.is_enc:
        H_z = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)),
                      [1, timesteps, 1])
        H_f = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)),
                      [1, timesteps, 1])
        H_o = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)),
                      [1, timesteps, 1])
        concatenated = tf.concat([H, H_z, H_f, H_o], 0)  # (16*4, 150, 320)
        return concatenated
    else:
        return H  # (16, 150, 320)
def tower_infer_dec(chars,
                    scope,
                    rnn_cell,
                    dec_cell,
                    word_emb,
                    rnn_state,
                    out_reuse_vars=False,
                    dev='/cpu:0'):

    with tf.device(dev):
        with tf.variable_scope('embatch_size', reuse=True):
            # (vocab_size, latent_dim)
            emb_char = tf.sg_emb(name='emb_char',
                                 voca_size=Hp.char_vs,
                                 dim=Hp.hd,
                                 dev=dev)
            emb_word = tf.sg_emb(name='emb_word',
                                 emb=word_emb,
                                 voca_size=Hp.word_vs,
                                 dim=300,
                                 dev=dev)

    print(chars)
    ch = chars
    ch = tf.reverse_sequence(input=ch,
                             seq_lengths=[Hp.c_maxlen] * Hp.batch_size,
                             seq_dim=1)
    reuse_vars = reuse_vars_enc = True

    # --------------------------   BYTENET ENCODER   --------------------------

    with tf.variable_scope('encoder'):
        # embed table lookup
        enc = ch.sg_lookup(emb=emb_char)  #(batch, sentlen, latentdim)
        # loop dilated conv block
        for i in range(Hp.num_blocks):
            enc = (enc.sg_res_block(size=5,
                                    rate=1,
                                    name="enc1_%d" % (i),
                                    is_first=True,
                                    reuse_vars=reuse_vars,
                                    dev=dev).sg_res_block(
                                        size=5,
                                        rate=2,
                                        name="enc2_%d" % (i),
                                        reuse_vars=reuse_vars,
                                        dev=dev).sg_res_block(
                                            size=5,
                                            rate=4,
                                            name="enc4_%d" % (i),
                                            reuse_vars=reuse_vars,
                                            dev=dev).sg_res_block(
                                                size=5,
                                                rate=8,
                                                name="enc8_%d" % (i),
                                                reuse_vars=reuse_vars,
                                                dev=dev).sg_res_block(
                                                    size=5,
                                                    rate=16,
                                                    name="enc16_%d" % (i),
                                                    reuse_vars=reuse_vars,
                                                    dev=dev))
        byte_enc = enc
        # --------------------------   QCNN + QPOOL ENCODER #1  --------------------------

        with tf.variable_scope('quazi'):

            #quasi cnn layer ZFO  [batch * 3, seqlen, dim2 ]
            conv = byte_enc.sg_quasi_conv1d(is_enc=True,
                                            size=4,
                                            name="qconv_1",
                                            dev=dev,
                                            reuse_vars=reuse_vars)
            # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd]
            pool0 = conv.sg_quasi_rnn(is_enc=False,
                                      att=False,
                                      name="qrnn_1",
                                      reuse_vars=reuse_vars,
                                      dev=dev)

            qpool_last = pool0[:, -1, :]

    # --------------------------   MAXPOOL along time dimension   --------------------------

    inpt_maxpl = tf.expand_dims(byte_enc, 1)  # [batch, 1, seqlen, channels]
    maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1],
                             'VALID')
    maxpool = tf.squeeze(maxpool, [1, 2])

    # --------------------------   HIGHWAY   --------------------------

    concat = qpool_last + maxpool
    with tf.variable_scope('highway', reuse=reuse_vars):
        input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1)

    # --------------------------   CONTEXT LSTM  --------------------------

    input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob)

    with tf.variable_scope('contx_lstm', reuse=reuse_vars):
        output, rnn_state = rnn_cell(input_lstm, rnn_state)

    beam_size = 8
    reuse_vars = out_reuse_vars

    greedy = False
    if greedy:

        dec_state = rnn_state
        dec_out = []
        d_out = tf.constant([1] * Hp.batch_size)
        for idx in range(Hp.w_maxlen):
            w_input = d_out.sg_lookup(emb=emb_word)
            dec_state = tf.contrib.rnn.LSTMStateTuple(c=dec_state.c,
                                                      h=dec_state.h)
            with tf.variable_scope('dec_lstm', reuse=idx > 0 or reuse_vars):
                d_out, dec_state = dec_cell(w_input, dec_state)

            dec_out.append(d_out)
            d_out = tf.expand_dims(d_out, 1).sg_conv1d_gpus(size=1,
                                                            dim=Hp.word_vs,
                                                            name="out_conv",
                                                            act="linear",
                                                            dev=dev,
                                                            reuse=idx > 0
                                                            or reuse_vars)
            d_out = tf.squeeze(d_out).sg_argmax()

        dec_out = tf.stack(dec_out, 1)

        dec = dec_out.sg_conv1d_gpus(size=1,
                                     dim=Hp.word_vs,
                                     name="out_conv",
                                     act="linear",
                                     dev=dev,
                                     reuse=True)
        return dec.sg_argmax(), rnn_state

    else:

        # ------------------ BEAM SEARCH --------------------
        dec_state = tf.contrib.rnn.LSTMStateTuple(
            tf.tile(tf.expand_dims(rnn_state[0], 1), [1, beam_size, 1]),
            tf.tile(tf.expand_dims(rnn_state[1], 1), [1, beam_size, 1]))
        initial_ids = tf.constant([1] * Hp.batch_size)

        def symbols_to_logits_fn(ids, dec_state):
            dec = []
            dec_c, dec_h = [], []
            # (batch x beam_size x decoded_seq)
            ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1])
            print("dec_state ", dec_state[0].get_shape().as_list())
            for ind in range(beam_size):
                with tf.variable_scope('dec_lstm', reuse=ind > 0
                                       or reuse_vars):
                    w_input = ids[:, ind, -1].sg_lookup(emb=emb_word)
                    dec_state0 = tf.contrib.rnn.LSTMStateTuple(
                        c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :])
                    dec_out, dec_state_i = dec_cell(w_input, dec_state0)
                    dec_out = tf.expand_dims(dec_out, 1)
                dec_i = dec_out.sg_conv1d_gpus(size=1,
                                               dim=Hp.word_vs,
                                               name="out_conv",
                                               act="linear",
                                               dev=dev,
                                               reuse=ind > 0 or reuse_vars)

                dec.append(tf.squeeze(dec_i, 1))
                dec_c.append(dec_state_i[0])
                dec_h.append(dec_state_i[1])
            return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple(
                tf.stack(dec_c, 1), tf.stack(dec_h, 1))

        final_ids, final_probs = beam_search.beam_search(symbols_to_logits_fn,
                                                         dec_state,
                                                         initial_ids,
                                                         beam_size,
                                                         Hp.w_maxlen - 1,
                                                         Hp.word_vs,
                                                         3.5,
                                                         eos_id=2)

        return final_ids[:, 0, :], rnn_state
    def rnn_body_stat(time, rnn_state):
        ch = chars_sent.read(time)
        ch = tf.reverse_sequence(input=ch,
                                 seq_lengths=[Hp.c_maxlen] * Hp.batch_size,
                                 seq_dim=1)
        reuse_vars = out_reuse_vars

        # --------------------------   BYTENET ENCODER   --------------------------

        with tf.variable_scope('encoder'):
            # embed table lookup
            enc = ch.sg_lookup(emb=emb_char)  #(batch, sentlen, latentdim)
            # loop dilated conv block
            for i in range(Hp.num_blocks):
                enc = (enc.sg_res_block(size=5,
                                        rate=1,
                                        name="enc1_%d" % (i),
                                        is_first=True,
                                        reuse_vars=reuse_vars,
                                        dev=dev).sg_res_block(
                                            size=5,
                                            rate=2,
                                            name="enc2_%d" % (i),
                                            reuse_vars=reuse_vars,
                                            dev=dev).sg_res_block(
                                                size=5,
                                                rate=4,
                                                name="enc4_%d" % (i),
                                                reuse_vars=reuse_vars,
                                                dev=dev).sg_res_block(
                                                    size=5,
                                                    rate=8,
                                                    name="enc8_%d" % (i),
                                                    reuse_vars=reuse_vars,
                                                    dev=dev).sg_res_block(
                                                        size=5,
                                                        rate=16,
                                                        name="enc16_%d" % (i),
                                                        reuse_vars=reuse_vars,
                                                        dev=dev))
            byte_enc = enc
            # --------------------------   QCNN + QPOOL ENCODER #1  --------------------------

            with tf.variable_scope('quazi'):

                #quasi cnn layer ZFO  [batch * 3, seqlen, dim2 ]
                conv = byte_enc.sg_quasi_conv1d(is_enc=True,
                                                size=4,
                                                name="qconv_1",
                                                dev=dev,
                                                reuse_vars=reuse_vars)
                # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd]
                pool0 = conv.sg_quasi_rnn(is_enc=False,
                                          att=False,
                                          name="qrnn_1",
                                          reuse_vars=reuse_vars,
                                          dev=dev)

                qpool_last = pool0[:, -1, :]

        # --------------------------   MAXPOOL along time dimension   --------------------------

        inpt_maxpl = tf.expand_dims(byte_enc,
                                    1)  # [batch, 1, seqlen, channels]
        maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1],
                                 [1, 1, 1, 1], 'VALID')
        maxpool = tf.squeeze(maxpool, [1, 2])

        # --------------------------   HIGHWAY   --------------------------

        concat = qpool_last + maxpool
        with tf.variable_scope('highway', reuse=reuse_vars):
            input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1)

        # --------------------------   CONTEXT LSTM  --------------------------
        input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob)

        with tf.variable_scope('contx_lstm', reuse=reuse_vars):
            output, rnn_state = rnn_cell(input_lstm, rnn_state)

        return (time + 1, rnn_state)
예제 #5
0
def sg_reverse_seq(tensor, opt):
    # default sequence dimension
    opt += tf.sg_opt(dim=1)
    seq_len = tf.not_equal(tensor,
                           tf.zeros_like(tensor)).sg_int().sg_sum(dims=opt.dim)
    return tf.reverse_sequence(tensor, seq_len, opt.dim, name=opt.name)
예제 #6
0
def sg_lstm(tensor, opt):
    r"""Applies an LSTM.

    Args:
      tensor: A 3-D `Tensor` (automatically passed by decorator).
      opt:
        in_dim: A positive `integer`. The size of input dimension.
        dim: A positive `integer`. The size of output dimension.
        bias: Boolean. If True, biases are added.
        ln: Boolean. If True, layer normalization is applied.   
        init_state: A 2-D `Tensor`. If None, the initial state is set to zeros.
        last_only: Boolean. If True, the outputs in the last time step are returned.
        mask: Boolean 2-D `Tensor` or None(default).
            For false elements values are excluded from the calculation.
            As a result, the outputs for the locations become 0.
        summary: If True, summaries are added. The default is True.

    Returns:
      A `Tensor`. If last_only is True, the output tensor has shape [batch size, dim].
      Otherwise, [batch size, time steps, dim].
    """
    # layer normalization
    # noinspection PyPep8
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(hh, cc, x):
        # forget gate
        f = tf.sigmoid(ln(tf.matmul(x, w_f) + tf.matmul(hh, u_f) + (b_f if opt.bias else 0)))
        # input gate
        ii = tf.sigmoid(ln(tf.matmul(x, w_i) + tf.matmul(hh, u_i) + (b_i if opt.bias else 0)))
        # new cell value
        c_new = tf.tanh(ln(tf.matmul(x, w_c) + tf.matmul(hh, u_c) + (b_c if opt.bias else 0)))
        # out gate
        o = tf.sigmoid(ln(tf.matmul(x, w_o) + tf.matmul(hh, u_o) + (b_o if opt.bias else 0)))
        # cell update
        cell = f * cc + ii * c_new
        # final output
        y = o * tf.tanh(cell)
        return y, cell

    # parameter initialize
    w_i = tf.sg_initializer.orthogonal('W_i', (opt.in_dim, opt.dim), summary=opt.summary)
    u_i = tf.sg_initializer.identity('U_i', opt.dim, summary=opt.summary)
    w_f = tf.sg_initializer.orthogonal('W_f', (opt.in_dim, opt.dim), summary=opt.summary)
    u_f = tf.sg_initializer.identity('U_f', opt.dim, summary=opt.summary)
    w_o = tf.sg_initializer.orthogonal('W_o', (opt.in_dim, opt.dim), summary=opt.summary)
    u_o = tf.sg_initializer.identity('U_o', opt.dim, summary=opt.summary)
    w_c = tf.sg_initializer.orthogonal('W_c', (opt.in_dim, opt.dim), summary=opt.summary)
    u_c = tf.sg_initializer.identity('U_c', opt.dim, summary=opt.summary)
    if opt.bias:
        b_i = tf.sg_initializer.constant('b_i', opt.dim, summary=opt.summary)
        b_f = tf.sg_initializer.constant('b_f', opt.dim, summary=opt.summary)
        b_o = tf.sg_initializer.constant('b_o', opt.dim, value=1, summary=opt.summary)
        b_c = tf.sg_initializer.constant('b_c', opt.dim, summary=opt.summary)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = tf.sg_initializer.constant('beta', opt.dim, summary=opt.summary)
        gamma = tf.sg_initializer.constant('gamma', opt.dim, value=1, summary=opt.summary)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, c, out = init_h, init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h, c = step(h, c, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(axis=1))

    # merge tensor
    out = tf.concat(out, 1)

    # apply mask
    if opt.mask is None:
        if opt.last_only:
            return out[:, -1, :]
        else:
            return out
    else:
        # apply mask
        out *= opt.mask.sg_expand_dims(axis=2).sg_float()

        if opt.last_only:
            # calc sequence length using given mask
            seq_len = opt.mask.sg_int().sg_sum(axis=1)
            # get last output
            rev = tf.reverse_sequence(out, seq_len, seq_axis=1)
            return rev[:, 0, :]
        else:
            return out
예제 #7
0
def sg_gru(tensor, opt):
    r"""Applies a GRU.
    
    Args:
      tensor: A 3-D `Tensor` (automatically passed by decorator).
      opt:
        in_dim: A positive `integer`. The size of input dimension.
        dim: A positive `integer`. The size of output dimension.
        bias: Boolean. If True, biases are added.
        ln: Boolean. If True, layer normalization is applied.   
        init_state: A 2-D `Tensor`. If None, the initial state is set to zeros.
        last_only: Boolean. If True, the outputs in the last time step are returned.
        mask: Boolean 2-D `Tensor` or None(default).
            For false elements values are excluded from the calculation.
            As a result, the outputs for the locations become 0.
        summary: If True, summaries are added. The default is True.

    Returns:
      A `Tensor`. If last_only is True, the output tensor has shape [batch size, dim].
      Otherwise, [batch size, time steps, dim].
    """

    # layer normalization
    # noinspection PyPep8
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(hh, x):
        # update gate
        z = tf.sigmoid(ln(tf.matmul(x, w_z) + tf.matmul(hh, u_z) + (b_z if opt.bias else 0)))
        # reset gate
        r = tf.sigmoid(ln(tf.matmul(x, w_r) + tf.matmul(hh, u_r) + (b_r if opt.bias else 0)))
        # h_hat
        h_hat = tf.tanh(ln(tf.matmul(x, w_h) + tf.matmul(r * hh, u_h) + (b_h if opt.bias else 0)))
        # final output
        y = (1. - z) * h_hat + z * hh
        return y

    # parameter initialize
    w_z = tf.sg_initializer.orthogonal('W_z', (opt.in_dim, opt.dim), summary=opt.summary)
    u_z = tf.sg_initializer.identity('U_z', opt.dim, summary=opt.summary)
    w_r = tf.sg_initializer.orthogonal('W_r', (opt.in_dim, opt.dim), summary=opt.summary)
    u_r = tf.sg_initializer.identity('U_r', opt.dim, summary=opt.summary)
    w_h = tf.sg_initializer.orthogonal('W_h', (opt.in_dim, opt.dim), summary=opt.summary)
    u_h = tf.sg_initializer.identity('U_h', opt.dim, summary=opt.summary)
    if opt.bias:
        b_z = tf.sg_initializer.constant('b_z', opt.dim, summary=opt.summary)
        b_r = tf.sg_initializer.constant('b_r', opt.dim, summary=opt.summary)
        b_h = tf.sg_initializer.constant('b_h', opt.dim, summary=opt.summary)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = tf.sg_initializer.constant('beta', opt.dim, summary=opt.summary)
        gamma = tf.sg_initializer.constant('gamma', opt.dim, value=1, summary=opt.summary)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, out = init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h = step(h, tensor[:, i, :])
        # save result
        # noinspection PyUnresolvedReferences
        out.append(h.sg_expand_dims(axis=1))

    # merge tensor
    out = tf.concat(out, 1)

    # apply mask
    if opt.mask is None:
        if opt.last_only:
            return out[:, -1, :]
        else:
            return out
    else:
        # apply mask
        out *= opt.mask.sg_expand_dims(axis=2).sg_float()

        if opt.last_only:
            # calc sequence length using given mask
            seq_len = opt.mask.sg_int().sg_sum(axis=1)
            # get last output
            rev = tf.reverse_sequence(out, seq_len, seq_axis=1)
            return rev[:, 0, :]
        else:
            return out