コード例 #1
0
    def __init__(self,
                 in_dim,
                 dim,
                 forget_bias=1.0,
                 activation=tf.tanh,
                 ln=True,
                 bias=True,
                 dtype=tf.float32,
                 dev='/cpu:0',
                 batch_size=3):

        self._in_dim = in_dim
        self._dim = dim
        self._forget_bias = forget_bias
        self._activation = activation
        self._ln = False
        self._bias = bias
        self._dev = dev
        self._size = self._in_dim * self._dim
        self._initializer = tf.contrib.layers.xavier_initializer(
        )  #tf.random_normal_initializer()
        self._dtype = dtype

        with tf.device(self._dev):
            with tf.variable_scope("lstm") as scp:
                #self.rnn_state = tf.get_variable("rnn_c",(batch_size, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False)
                #self.rnn_h = tf.get_variable("rnn_h",(batch_size, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False)
                self.rnn_state, self.rnn_h = tf.zeros(
                    (batch_size, self._dim), dtype=tf.sg_floatx), tf.zeros(
                        (batch_size, self._dim), dtype=tf.sg_floatx)
                w_i2h = tf.get_variable(
                    'w_i2h', (self._in_dim, 4 * self._dim),
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    trainable=True)
                w_h2h = tf.get_variable(
                    'w_h2h', (self._dim, 4 * self._dim),
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    trainable=True)
                w_b = tf.get_variable(
                    'w_b', (1, 4 * self._dim),
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    trainable=True) if self._bias == True else 0.0
                if self._ln:
                    with tf.variable_scope("ln_rnn"):
                        beta = tf.get_variable(
                            'beta',
                            self._dim,
                            dtype=tf.sg_floatx,
                            initializer=tf.constant_initializer(0.0),
                            trainable=True)
                        gamma = tf.get_variable(
                            'gamma',
                            self._dim,
                            dtype=tf.sg_floatx,
                            initializer=tf.constant_initializer(1.0),
                            trainable=True)
コード例 #2
0
def sg_emb(**kwargs):
    r"""Returns an embedding layer or a look-up table.
    
    Args:
      name: A name for the layer (required).
      emb: A 2-D array. Has the shape of `[vocabulary size -1, embedding dimension size]`.
        Note that the first row is filled with 0's because they correspond to padding.
      in_dim: A positive `integer`. The size of input dimension.
      dim: A positive `integer`. The size of output dimension.
      voca_size: A positive int32.
      
    Returns:
      A 2-D tensor.
    """
    opt = tf.sg_opt(kwargs)
    assert opt.name is not None, 'name is mandatory.'

    import sg_initializer as init

    if opt.emb is None:
        # initialize embedding matrix
        assert opt.voca_size is not None, 'voca_size is mandatory.'
        assert opt.dim is not None, 'dim is mandatory.'
        w = init.he_uniform(opt.name, (opt.voca_size - 1, opt.dim))
    else:
        # use given embedding matrix
        w = init.external(opt.name, value=opt.emb)

    # 1st row should be zero and not be updated by backprop because of zero padding.
    emb = tf.concat(0, [tf.zeros((1, opt.dim), dtype=tf.sg_floatx), w])

    return emb
コード例 #3
0
ファイル: sg_layer.py プロジェクト: jamcar23/sugartensor
def sg_emb(**kwargs):
    r"""Returns a look-up table for embedding.
    
    kwargs:
      name: A name for the layer.
      emb: A 2-D array (optional). 
        If None, the resulting tensor should have the shape of 
        `[vocabulary size, embedding dimension size]`.
        Note that its first row is filled with 0's associated with padding.
      in_dim: A positive `integer`. The size of input dimension.
      dim: A positive `integer`. The size of output dimension.
      voca_size: A positive integer. The size of vocabulary.
      
    Returns:
      A 2-D `Tensor` of float32.
    """
    opt = tf.sg_opt(kwargs)
    assert opt.name is not None, 'name is mandatory.'

    if opt.emb is None:
        # initialize embedding matrix
        assert opt.voca_size is not None, 'voca_size is mandatory.'
        assert opt.dim is not None, 'dim is mandatory.'
        w = tf.sg_initializer.he_uniform(opt.name,
                                         (opt.voca_size - 1, opt.dim))
    else:
        # use given embedding matrix
        w = tf.sg_initializer.external(opt.name, value=opt.emb)

    # 1st row should be zero and not be updated by backprop because of zero padding.
    emb = tf.concat(0, [tf.zeros((1, opt.dim), dtype=tf.sg_floatx), w])

    return emb
コード例 #4
0
def sg_rnn(tensor, opt):
    r"""Applies a simple rnn.
    
    Args:
      tensor: A 3-D `Tensor`.
      in_dim: A positive `integer`. The size of input dimension.
      dim: A positive `integer`. The size of output dimension.
      bias: Boolean. If True, biases are added.
      ln: Boolean. If True, layer normalization is applied.   
      init_state: A 2-D `Tensor`. If None, the initial state is set to zeros.
      last_only: Boolean. If True, the outputs in the last time step are returned.
    
    Returns:
      A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim].
        If last_only is True, the shape will be [batch size, dim].
    """
    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step function
    def step(h, x):
        # simple rnn
        ### Replace tensor[:, i, :] with x. bryan ###
        y = ln(
            tf.matmul(tensor[:, i, :], w) + tf.matmul(h, u) +
            (b if opt.bias else 0))
        return y

    # parameter initialize
    w = init.orthogonal('W', (opt.in_dim, opt.dim))
    u = init.identity('U', opt.dim)
    if opt.bias:
        b = init.constant('b', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, out = init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step func
        h = step(h, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out
コード例 #5
0
def sg_rnn(tensor, opt):

    # parameter initialize
    w = init.orthogonal('W', (opt.in_dim, opt.dim))
    u = init.identity('U', opt.dim)
    if opt.bias:
        b = init.constant('b', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # permute dimension for scan loop
    xx = tf.transpose(tensor, [1, 0, 2])

    # step func
    def step(h, x):

        # layer normalization
        def ln(xx, opt):
            if opt.ln:
                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1])

                # apply layer normalization ( explicit broadcasting needed )
                broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1)
                xx = (xx - tf.reshape(mean, broadcast_shape)) \
                         / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape)

                # apply parameter
                return gamma * xx + beta

        # apply transform
        y = ln(tf.matmul(x, w) + tf.matmul(h, u) + (b if opt.bias else 0), opt)

        return y

    # loop by scan
    out = tf.scan(step, xx, init_h)

    # recover dimension
    out = tf.transpose(out, [1, 0, 2])

    # last sequence only
    if opt.last_only:
        out = out[:, tensor.get_shape().as_list()[1]-1, :]

    return out
コード例 #6
0
def q_process(t1, t2):
    '''
    Processes each training sample so that it fits in the queue.
    '''
    # Lstrip zeros
    zeros = tf.equal(t1, tf.zeros_like(t1)).sg_int().sg_sum()
    t1 = t1[zeros:] 
    t2 = t2[zeros:]

    # zero-PrePadding
    t1 = tf.concat([tf.zeros([Hyperparams.seqlen-1], tf.int32), t1], 0)# 49 zero-prepadding
    t2 = tf.concat([tf.zeros([Hyperparams.seqlen-1], tf.int32), t2], 0)# 49 zero-prepadding
    # radom crop    
    stacked = tf.stack((t1, t2))
    cropped = tf.random_crop(stacked, [2, Hyperparams.seqlen])
    t1, t2 = cropped[0], cropped[1]
    
    t2 = t2[-1]

    return t1, t2
コード例 #7
0
ファイル: sg_layer.py プロジェクト: joastern/sugartensor
def sg_rnn(tensor, opt):

    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step function
    def step(h, x):
        # simple rnn
        y = ln(
            tf.matmul(tensor[:, i, :], w) + tf.matmul(h, u) +
            (b if opt.bias else 0))
        return y

    # parameter initialize
    w = init.orthogonal('W', (opt.in_dim, opt.dim))
    u = init.identity('U', opt.dim)
    if opt.bias:
        b = init.constant('b', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, out = init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step func
        h = step(h, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out
コード例 #8
0
ファイル: sg_layer.py プロジェクト: joastern/sugartensor
def sg_emb(**kwargs):
    opt = tf.sg_opt(kwargs)
    assert opt.name is not None, 'name is mandatory.'

    import sg_initializer as init

    if opt.emb is None:
        # initialize embedding matrix
        assert opt.voca_size is not None, 'voca_size is mandatory.'
        assert opt.dim is not None, 'dim is mandatory.'
        w = init.he_uniform(opt.name, (opt.voca_size - 1, opt.dim))
    else:
        # use given embedding matrix
        w = init.external(opt.name, value=opt.emb)

    # 1st row should be zero and not be updated by backprop because of zero padding.
    emb = tf.concat(0, [tf.zeros((1, opt.dim), dtype=tf.sg_floatx), w])

    return emb
コード例 #9
0
def trainIt():
    data = prepareData()
    x = data['train'][0]
    # x = data['train']
    z = tf.random_normal((batch_size, rand_dim))
    gen = generator(z)
    disc_real = discriminator(x)
    disc_fake = discriminator(gen)
    loss_d_r = disc_real.sg_mse(target=data['train'][1], name='disc_real')
    # loss_d_r = disc_real.sg_mse(target = tf.ones(batch_size), name = 'disc_real')
    loss_d_f = disc_fake.sg_mse(target=tf.zeros(batch_size), name='disc_fake')
    loss_d = (loss_d_r + loss_d_f) / 2
    loss_g = disc_fake.sg_mse(target=tf.ones(batch_size), name='gen')
    # train_disc = tf.sg_optim(loss_d, lr=0.01, name = 'train_disc', category = 'discriminator')  # discriminator train ops
    train_disc = tf.sg_optim(loss_d_r,
                             lr=0.01,
                             name='train_disc',
                             category='discriminator')
    train_gen = tf.sg_optim(loss_g, lr=0.01,
                            category='generator')  # generator train ops

    @tf.sg_train_func
    def alt_train(sess, opt):
        if sess.run(tf.sg_global_step()) % 1 == 0:
            l_disc = sess.run([loss_d_r,
                               train_disc])[0]  # training discriminator
        else:
            l_disc = sess.run(loss_d)
        # l_gen = sess.run([loss_g, train_gen])[0]  # training generator
        # print np.mean(l_gen)
        return np.mean(l_disc)  #+ np.mean(l_gen)

    alt_train(log_interval=10,
              max_ep=25,
              ep_size=(1100 + 690) / batch_size,
              early_stop=False,
              save_dir='asset/train/gan',
              save_interval=10)
コード例 #10
0
 def zero_state(self, batch_size):
     dtype = tf.float32
     return (tf.zeros((batch_size, self._seqlen, self._dim),
                      dtype=tf.sg_floatx),
             tf.zeros((batch_size, self._seqlen, self._dim),
                      dtype=tf.sg_floatx))
コード例 #11
0
    def rnn_body(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h,
                 losses):
        x = x_sent.read(time)
        y = x_sent.read(time + 1)  #   (batch, sentlen) = (16, 200)

        # shift target by one step for training source
        y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), y[:, :-1]],
                          1)
        reuse_vars = time == tf.constant(0) or reu_vars

        # --------------------------   BYTENET ENCODER   --------------------------

        # embed table lookup
        enc = x.sg_lookup(emb=emb_x)  #(batch, sentlen, latentdim)
        # loop dilated conv block
        for i in range(num_blocks):
            enc = (enc.sg_res_block(
                size=5, rate=1, name="enc1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=5,
                    rate=2,
                    name="enc2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=5,
                        rate=4,
                        name="enc4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=5,
                            rate=8,
                            name="enc8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=5,
                                rate=16,
                                name="enc16_%d" % (i),
                                reuse_vars=reuse_vars))


# --------------------------   QCNN + QPOOL ENCODER with attention #1  --------------------------

#quasi cnn layer ZFO  [batch * 3, t, dim2 ]
        conv = enc.sg_quasi_conv1d(is_enc=True,
                                   size=3,
                                   name="qconv_1",
                                   reuse_vars=reuse_vars)
        #attention layer
        # recurrent layer # 1 + final encoder hidden state
        subrec1 = tf.tile((subrec1.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1])
        concat = conv.sg_concat(target=subrec1,
                                axis=0)  # (batch*4, sentlen, latentdim)
        pool = concat.sg_quasi_rnn(is_enc=True,
                                   att=True,
                                   name="qrnn_1",
                                   reuse_vars=reuse_vars)
        subrec1 = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   QCNN + QPOOL ENCODER with attention #2  --------------------------

        # quazi cnn ZFO (batch*3, sentlen, latentdim)
        conv = pool.sg_quasi_conv1d(is_enc=True,
                                    size=2,
                                    name="qconv_2",
                                    reuse_vars=reuse_vars)
        # (batch, sentlen-duplicated, latentdim)
        subrec2 = tf.tile((subrec2.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1])
        # (batch*4, sentlen, latentdim)
        concat = conv.sg_concat(target=subrec2, axis=0)
        pool = concat.sg_quasi_rnn(is_enc=True,
                                   att=True,
                                   name="qrnn_2",
                                   reuse_vars=reuse_vars)
        subrec2 = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   ConvLSTM with RESIDUAL connection and MULTIPLICATIVE block   --------------------------

        #residual block
        causal = False  # for encoder
        crnn_input = (pool[:Hp.batch_size, :, :].sg_bypass_gpus(
            name='relu_0', act='relu', bn=(not causal),
            ln=causal).sg_conv1d_gpus(name="dimred_0",
                                      size=1,
                                      dev="/cpu:0",
                                      reuse=reuse_vars,
                                      dim=Hp.hd / 2,
                                      act='relu',
                                      bn=(not causal),
                                      ln=causal))

        # conv LSTM
        with tf.variable_scope("mem/clstm") as scp:
            (crnn_state, crnn_h) = crnn_cell(crnn_input, (crnn_state, crnn_h),
                                             size=5,
                                             reuse_vars=reuse_vars)
        # dimension recover and residual connection
        rnn_input0 = pool[:Hp.batch_size,:,:] + crnn_h\
                    .sg_conv1d_gpus(name = "diminc_0",size=1,dev="/cpu:0", dim=Hp.hd,reuse=reuse_vars, act='relu', bn=(not causal), ln=causal)

        # --------------------------   QCNN + QPOOL ENCODER with attention #3  --------------------------

        # pooling for lstm input
        # quazi cnn ZFO (batch*3, sentlen, latentdim)
        conv = rnn_input0.sg_quasi_conv1d(is_enc=True,
                                          size=2,
                                          name="qconv_3",
                                          reuse_vars=reuse_vars)
        pool = conv.sg_quasi_rnn(is_enc=True,
                                 att=False,
                                 name="qrnn_3",
                                 reuse_vars=reuse_vars)
        rnn_input = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   LSTM with RESIDUAL connection and MULTIPLICATIVE block --------------------------

        # recurrent block
        with tf.variable_scope("mem/lstm") as scp:
            (rnn_state, rnn_h) = rnn_cell(rnn_input, (rnn_state, rnn_h))

        rnn_h2 = tf.tile(((rnn_h + rnn_input).sg_expand_dims(axis=1)),
                         [1, Hp.maxlen, 1])

        # --------------------------   BYTENET DECODER   --------------------------

        # CNN decoder
        dec = y_src.sg_lookup(emb=emb_y).sg_concat(target=rnn_h2, name="dec")

        for i in range(num_blocks):
            dec = (dec.sg_res_block(
                size=3,
                rate=1,
                causal=True,
                name="dec1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=3,
                    rate=2,
                    causal=True,
                    name="dec2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=3,
                        rate=4,
                        causal=True,
                        name="dec4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=3,
                            rate=8,
                            causal=True,
                            name="dec8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=3,
                                rate=16,
                                causal=True,
                                name="dec16_%d" % (i),
                                reuse_vars=reuse_vars))

        # final fully convolution layer for softmax
        dec = dec.sg_conv1d_gpus(size=1,
                                 dim=Hp.vs,
                                 name="out",
                                 summary=False,
                                 dev=self._dev,
                                 reuse=reuse_vars)

        ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example")
        cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy')

        losses = tf.add_n([losses, cross_entropy_mean], name='total_loss')

        return (time + 1, subrec1, subrec2, rnn_state, rnn_h, crnn_state,
                crnn_h, losses)
コード例 #12
0
    def __init__(self, mode="train"):
        # Inputs and Labels
        if mode == "train":
            self.x, self.y, self.num_batch = get_batch_data(
            )  # (16, 150) int32, (16, 150) int32, int
            self.y_src = tf.concat(
                axis=1,
                values=[tf.zeros((Hp.bs, 1), tf.int32),
                        self.y[:, :-1]])  # (16, 150) int32
        else:  # inference
            self.x = tf.placeholder(tf.int32, shape=(Hp.bs, Hp.maxlen))
            self.y_src = tf.placeholder(tf.int32, shape=(Hp.bs, Hp.maxlen))

        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Embedding
        self.emb_x = tf.sg_emb(name='emb_x',
                               voca_size=len(self.char2idx),
                               dim=Hp.hd)  # (179, 320)
        self.emb_y = tf.sg_emb(name='emb_y',
                               voca_size=len(self.char2idx),
                               dim=Hp.hd)  # (179, 320)
        self.X = self.x.sg_lookup(emb=self.emb_x)  # (16, 150, 320)
        self.Y = self.y_src.sg_lookup(emb=self.emb_y)  # (16, 150, 320)

        # Encoding
        self.conv = self.X.sg_quasi_conv1d(is_enc=True,
                                           size=6)  # (16*4, 150, 320)
        self.pool = self.conv.sg_quasi_rnn(is_enc=True,
                                           att=False)  # (16*4, 150, 320)
        self.H_zfo1 = self.pool[Hp.bs:]  # (16*3, 15, 320) for decoding

        self.conv = self.pool.sg_quasi_conv1d(is_enc=True,
                                              size=2)  # (16*4, 150, 320)
        self.pool = self.conv.sg_quasi_rnn(is_enc=True,
                                           att=False)  # (16*4, 150, 320)
        self.H_zfo2 = self.pool[Hp.bs:]  # (16*3, 150, 320) for decoding

        self.conv = self.pool.sg_quasi_conv1d(is_enc=True,
                                              size=2)  # (16*4, 150, 320)
        self.pool = self.conv.sg_quasi_rnn(is_enc=True,
                                           att=False)  # (16*4, 150, 320)
        self.H_zfo3 = self.pool[Hp.bs:]  # (16*3, 150, 320) for decoding

        self.conv = self.pool.sg_quasi_conv1d(is_enc=True,
                                              size=2)  # (16*4, 150, 320)
        self.pool = self.conv.sg_quasi_rnn(is_enc=True,
                                           att=False)  # (16*4, 150, 320)
        self.H4 = self.pool[:Hp.bs]
        self.H_zfo4 = self.pool[Hp.bs:]  # (16*3, 150, 320) for decoding

        # Decoding
        self.dec = self.Y.sg_concat(target=self.H_zfo1, dim=0)

        self.d_conv = self.dec.sg_quasi_conv1d(is_enc=False, size=2)
        self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False,
                                               att=False)  # (16*4, 150, 320)

        self.d_conv = (self.d_pool.sg_concat(
            target=self.H_zfo2, dim=0).sg_quasi_conv1d(is_enc=False, size=2))
        self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False,
                                               att=False)  # (16*4, 150, 320)

        self.d_conv = (self.d_pool.sg_concat(
            target=self.H_zfo3, dim=0).sg_quasi_conv1d(is_enc=False, size=2))
        self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False,
                                               att=False)  # (16*4, 150, 320)

        self.d_conv = (self.d_pool.sg_concat(
            target=self.H_zfo4, dim=0).sg_quasi_conv1d(is_enc=False, size=2))
        self.concat = self.H4.sg_concat(target=self.d_conv, dim=0)
        self.d_pool = self.concat.sg_quasi_rnn(is_enc=False,
                                               att=True)  # (16*4, 150, 320)

        self.logits = self.d_pool.sg_conv1d(size=1,
                                            dim=len(self.char2idx),
                                            act="linear")  # (16, 150, 179)
        self.preds = self.logits.sg_argmax()
        if mode == 'train':
            # cross entropy loss with logits ( for training set )
            self.loss = self.logits.sg_ce(target=self.y, mask=True)
            self.istarget = tf.not_equal(self.y, 0).sg_float()
            self.reduced_loss = (self.loss.sg_sum()) / (
                self.istarget.sg_sum() + 0.00001)
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
コード例 #13
0
def tower_loss2_old(xx, scope, reuse_vars=False):

    # make embedding matrix for source and target
    with tf.variable_scope('embs', reuse=reuse_vars):
        emb_x = tf.sg_emb(name='emb_x',
                          voca_size=Hp.vs,
                          dim=Hp.hd,
                          dev=self._dev)
        emb_y = tf.sg_emb(name='emb_y',
                          voca_size=Hp.vs,
                          dim=Hp.hd,
                          dev=self._dev)

    x_sents = tf.unstack(xx, axis=1)  #each element is (batch, sentlen)

    # generate first an unconditioned sentence
    n_input = Hp.hd

    subrec1 = subrec_zero_state(Hp.bs, Hp.hd)
    subrec2 = subrec_zero_state(Hp.bs, Hp.hd)

    rnn_cell = LSTMCell(in_dim=n_input, dim=Hp.hd)
    (rnn_state, rnn_h) = rnn_cell.zero_state(Hp.bs)

    crnn_cell = ConvLSTMCell(in_dim=n_input, dim=Hp.hd)
    (crnn_state, crnn_h) = crnn_cell.zero_state(n_input)

    for sent in range(len(x_sents) - 1):
        y = x_sents[i + 1]
        x = x_sents[i]  #   (batch, sentlen) = (16, 200)
        # shift target by one step for training source
        y_src = tf.concat([tf.zeros((Hp.bs, 1), tf.sg_intx), y[:, :-1]], 1)

        # embed table lookup
        enc = x.sg_lookup(emb=emb_x)  #(batch, sentlen, dim1)
        # loop dilated conv block
        for i in range(num_blocks):
            enc = (enc.sg_res_block(
                size=5, rate=1, name="enc1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=5,
                    rate=2,
                    name="enc2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=5,
                        rate=4,
                        name="enc4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=5,
                            rate=8,
                            name="enc8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=5,
                                rate=16,
                                name="enc16_%d" % (i),
                                reuse_vars=reuse_vars))

        #quasi rnn layer  [batch * 3, t, dim2 ]
        conv = enc.sg_quasi_conv1d(is_enc=True,
                                   size=2,
                                   name="conv1",
                                   reuse_vars=reuse_vars)
        #attention layer
        # recurrent layer # 1 + final encoder hidden state
        concat = subrec1.sg_concat(target=conv, dim=0)
        subrec1 = conv.sg_quasi_rnn(is_enc=True, att=True)

        conv = pool.sg_quasi_conv1d(is_enc=True,
                                    size=2,
                                    name="conv2",
                                    reuse_vars=reuse_vars)
        concat = subrec2.sg_concat(target=conv, dim=0)
        subrec2 = conv.sg_quasi_rnn(is_enc=True, att=True)

        # conv LSTM
        (crnn_state, crnn_h) = crnn_cell(subrec2, (crnn_state, crnn_h), 5)

        # recurrent block
        (rnn_state, rnn_h) = rnn_cell(crnn_h, (rnn_state, rnn_h))

        # CNN decoder
        dec = crnn_h.sg_concat(target=y_src.sg_lookup(emb=emb_y), name="dec")

        for i in range(num_blocks):
            dec = (dec.sg_res_block(
                size=3,
                rate=1,
                causal=True,
                name="dec1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=3,
                    rate=2,
                    causal=True,
                    name="dec2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=3,
                        rate=4,
                        causal=True,
                        name="dec4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=3,
                            rate=8,
                            causal=True,
                            name="dec8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=3,
                                rate=16,
                                causal=True,
                                name="dec16_%d" % (i),
                                reuse_vars=reuse_vars))

        # final fully convolution layer for softmax
        dec = dec.sg_conv1d_gpus(size=1, dim=Hp.vs,name="out",summary=False,\
          dev = self._dev,reuse=reuse_vars)

        ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example")
        cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

    # Assemble all of the losses for the current tower only.
    losses = tf.get_collection('losses', scope)
    # Calculate the total loss for the current tower.
    total_loss = tf.add_n(losses, name='total_loss')

    return total_loss
コード例 #14
0
def sg_gru(tensor, opt):

    # parameter initialize
    w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim))
    u_z = init.identity('U_z', opt.dim)
    w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim))
    u_r = init.identity('U_r', opt.dim)
    w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim))
    u_h = init.identity('U_h', opt.dim)
    if opt.bias:
        b_z = init.constant('b_z', opt.dim)
        b_r = init.constant('b_r', opt.dim)
        b_h = init.constant('b_h', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # permute dimension for scan loop
    xx = tf.transpose(tensor, [1, 0, 2])

    # step func
    def step(h, x):

        # layer normalization
        def ln(xx, opt):
            if opt.ln:
                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1])

                # apply layer normalization ( explicit broadcasting needed )
                broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1)
                xx = (xx - tf.reshape(mean, broadcast_shape)) \
                         / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape)

                # apply parameter
                return gamma * xx + beta

        # update gate
        z = tf.sigmoid(ln(tf.matmul(x, w_z) + tf.matmul(h, u_z) + (b_z if opt.bias else 0), opt))
        # reset gate
        r = tf.sigmoid(ln(tf.matmul(x, w_r) + tf.matmul(h, u_r) + (b_r if opt.bias else 0), opt))
        # h_hat
        hh = tf.sigmoid(ln(tf.matmul(x, w_h) + tf.matmul(r*h, u_h) + (b_h if opt.bias else 0), opt))
        # final output
        y = (1. - z) * h + z * hh

        return y

    # loop by scan
    out = tf.scan(step, xx, init_h)

    # recover dimension
    out = tf.transpose(out, [1, 0, 2])

    # last sequence only
    if opt.last_only:
        out = out[:, tensor.get_shape().as_list()[1]-1, :]

    return out
コード例 #15
0
ファイル: train.py プロジェクト: yooynas/ByteNet
# inputs
#

# ComTrans parallel corpus input tensor ( with QueueRunner )
data = ComTrans(batch_size=batch_size)

# source, target sentence
x, y = data.source, data.target
voca_size = data.voca_size

# make embedding matrix for source and target
emb_x = tf.sg_emb(name='emb_x', voca_size=voca_size, dim=latent_dim)
emb_y = tf.sg_emb(name='emb_y', voca_size=voca_size, dim=latent_dim)

# shift target for training source
y_src = tf.concat(1, [tf.zeros((batch_size, 1), tf.sg_intx), y[:, :-1]])


# residual block
@tf.sg_sugar_func
def sg_res_block(tensor, opt):
    # default rate
    opt += tf.sg_opt(size=3, rate=1, causal=False)

    # input dimension
    in_dim = tensor.get_shape().as_list()[-1]

    # reduce dimension
    input_ = (tensor.sg_bypass(act='relu', bn=(not opt.causal),
                               ln=opt.causal).sg_conv1d(size=1,
                                                        dim=in_dim / 2,
コード例 #16
0
def sg_gru(tensor, opt):
    r"""Applies a GRU.
    
    Args:
      tensor: A 3-D `Tensor` (automatically passed by decorator).
      opt:
        in_dim: A positive `integer`. The size of input dimension.
        dim: A positive `integer`. The size of output dimension.
        bias: Boolean. If True, biases are added.
        ln: Boolean. If True, layer normalization is applied.   
        init_state: A 2-D `Tensor`. If None, the initial state is set to zeros.
        last_only: Boolean. If True, the outputs in the last time step are returned.
        mask: Boolean 2-D `Tensor` or None(default).
            For false elements values are excluded from the calculation.
            As a result, the outputs for the locations become 0.
        summary: If True, summaries are added. The default is True.

    Returns:
      A `Tensor`. If last_only is True, the output tensor has shape [batch size, dim].
      Otherwise, [batch size, time steps, dim].
    """

    # layer normalization
    # noinspection PyPep8
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(hh, x):
        # update gate
        z = tf.sigmoid(ln(tf.matmul(x, w_z) + tf.matmul(hh, u_z) + (b_z if opt.bias else 0)))
        # reset gate
        r = tf.sigmoid(ln(tf.matmul(x, w_r) + tf.matmul(hh, u_r) + (b_r if opt.bias else 0)))
        # h_hat
        h_hat = tf.tanh(ln(tf.matmul(x, w_h) + tf.matmul(r * hh, u_h) + (b_h if opt.bias else 0)))
        # final output
        y = (1. - z) * h_hat + z * hh
        return y

    # parameter initialize
    w_z = tf.sg_initializer.orthogonal('W_z', (opt.in_dim, opt.dim), summary=opt.summary)
    u_z = tf.sg_initializer.identity('U_z', opt.dim, summary=opt.summary)
    w_r = tf.sg_initializer.orthogonal('W_r', (opt.in_dim, opt.dim), summary=opt.summary)
    u_r = tf.sg_initializer.identity('U_r', opt.dim, summary=opt.summary)
    w_h = tf.sg_initializer.orthogonal('W_h', (opt.in_dim, opt.dim), summary=opt.summary)
    u_h = tf.sg_initializer.identity('U_h', opt.dim, summary=opt.summary)
    if opt.bias:
        b_z = tf.sg_initializer.constant('b_z', opt.dim, summary=opt.summary)
        b_r = tf.sg_initializer.constant('b_r', opt.dim, summary=opt.summary)
        b_h = tf.sg_initializer.constant('b_h', opt.dim, summary=opt.summary)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = tf.sg_initializer.constant('beta', opt.dim, summary=opt.summary)
        gamma = tf.sg_initializer.constant('gamma', opt.dim, value=1, summary=opt.summary)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, out = init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h = step(h, tensor[:, i, :])
        # save result
        # noinspection PyUnresolvedReferences
        out.append(h.sg_expand_dims(axis=1))

    # merge tensor
    out = tf.concat(out, 1)

    # apply mask
    if opt.mask is None:
        if opt.last_only:
            return out[:, -1, :]
        else:
            return out
    else:
        # apply mask
        out *= opt.mask.sg_expand_dims(axis=2).sg_float()

        if opt.last_only:
            # calc sequence length using given mask
            seq_len = opt.mask.sg_int().sg_sum(axis=1)
            # get last output
            rev = tf.reverse_sequence(out, seq_len, seq_axis=1)
            return rev[:, 0, :]
        else:
            return out
コード例 #17
0
ファイル: sg_layer.py プロジェクト: joastern/sugartensor
def sg_lstm(tensor, opt):

    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(h, c, x):
        # forget gate
        f = tf.sigmoid(
            ln(
                tf.matmul(x, w_f) + tf.matmul(h, u_f) +
                (b_f if opt.bias else 0)))
        # input gate
        i = tf.sigmoid(
            ln(
                tf.matmul(x, w_i) + tf.matmul(h, u_i) +
                (b_i if opt.bias else 0)))
        # new cell value
        cc = tf.tanh(
            ln(
                tf.matmul(x, w_c) + tf.matmul(h, u_c) +
                (b_c if opt.bias else 0)))
        # out gate
        o = tf.sigmoid(
            ln(
                tf.matmul(x, w_o) + tf.matmul(h, u_o) +
                (b_o if opt.bias else 0)))
        # cell update
        cell = f * c + i * cc
        # final output
        y = o * tf.tanh(cell)
        return y, cell

    # parameter initialize
    w_i = init.orthogonal('W_i', (opt.in_dim, opt.dim))
    u_i = init.identity('U_i', opt.dim)
    w_f = init.orthogonal('W_f', (opt.in_dim, opt.dim))
    u_f = init.identity('U_f', opt.dim)
    w_o = init.orthogonal('W_o', (opt.in_dim, opt.dim))
    u_o = init.identity('U_o', opt.dim)
    w_c = init.orthogonal('W_c', (opt.in_dim, opt.dim))
    u_c = init.identity('U_c', opt.dim)
    if opt.bias:
        b_i = init.constant('b_i', opt.dim)
        b_f = init.constant('b_f', opt.dim)
        b_o = init.constant('b_o', opt.dim, value=1)
        b_c = init.constant('b_c', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, c, out = init_h, init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h, c = step(h, c, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out
コード例 #18
0
 def zero_state(self, batch_size):
     dtype = tf.float32
     state_size = self.state_size
     return (tf.zeros((batch_size, state_size), dtype=tf.sg_floatx),
             tf.zeros((batch_size, state_size), dtype=tf.sg_floatx))
コード例 #19
0
# hyper parameters
#

batch_size = 16  # batch size

#
# inputs
#

# ComTrans parallel corpus input tensor ( with QueueRunner )
data = ComTrans(batch_size=batch_size)

# source, target sentence
x, y = data.source, data.target
# shift target for training source
y_in = tf.concat([tf.zeros((batch_size, 1), tf.sg_intx), y[:, :-1]], axis=1)
# vocabulary size
voca_size = data.voca_size

# make embedding matrix for source and target
emb_x = tf.sg_emb(name='emb_x', voca_size=voca_size, dim=latent_dim)
emb_y = tf.sg_emb(name='emb_y', voca_size=voca_size, dim=latent_dim)

# latent from embed table
z_x = x.sg_lookup(emb=emb_x)
z_y = y_in.sg_lookup(emb=emb_y)

# encode graph ( atrous convolution )
enc = encode(z_x)

# concat merge target source
コード例 #20
0
    def __init__(self, mode="train"):
        # Inputs and Labels
        if mode == "train":
            self.x, self.y, self.num_batch = get_batch_data() # (16, 150) int32, (16, 150) int32, int
            self.y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), self.y[:, :-1]], 1) # (16, 150) int32
        else: # inference
            self.x = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen))
            self.y_src = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen))
        
        # Load vocabulary    
        char2idx, idx2char = load_vocab()
        
        # Embedding
        def embed(inputs, vocab_size, embed_size, variable_scope):
            '''
            inputs = tf.expand_dims(tf.range(5), 0) => (1, 5)
            _embed(inputs, 5, 10) => (1, 5, 10)
            '''
            with tf.variable_scope(variable_scope):
                lookup_table = tf.get_variable('lookup_table', 
                                               dtype=tf.float32, 
                                               shape=[vocab_size, embed_size],
                                               initializer=tf.truncated_normal_initializer())
            return tf.nn.embedding_lookup(lookup_table, inputs)
        
        X = embed(self.x, vocab_size=len(char2idx), embed_size=Hp.hidden_units, variable_scope='X')  # (179, 320)
        Y = embed(self.y_src, vocab_size=len(char2idx), embed_size=Hp.hidden_units, variable_scope='Y')  # (179, 320)
#         Y = tf.concat((tf.zeros_like(Y[:, :1, :]), Y[:, :-1, :]), 1)
            
        # Encoding
        conv = X.sg_quasi_conv1d(is_enc=True, size=6) # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320)
        H_zfo1 = pool[Hp.batch_size:] # (16*3, 15, 320) for decoding
         
        conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320)
        H_zfo2 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding
         
        conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320)
        H_zfo3 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding
         
        conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320)
        H4 = pool[:Hp.batch_size] # (16, 150, 320) for decoding
        H_zfo4 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding

        # Decoding
        d_conv = (Y.sg_concat(target=H_zfo1, axis=0)
                   .sg_quasi_conv1d(is_enc=False, size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320)
        
        d_conv = (d_pool.sg_concat(target=H_zfo2, axis=0)
                        .sg_quasi_conv1d(is_enc=False, size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320)
        
        d_conv = (d_pool.sg_concat(target=H_zfo3, axis=0)
                        .sg_quasi_conv1d(is_enc=False, size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320)
        
        d_conv = (d_pool.sg_concat(target=H_zfo4, axis=0)
                        .sg_quasi_conv1d(is_enc=False, size=2))
        concat = H4.sg_concat(target=d_conv, axis=0)
        d_pool = concat.sg_quasi_rnn(is_enc=False, att=True) # (16, 150, 320)
        
        logits = d_pool.sg_conv1d(size=1, dim=len(char2idx), act="linear") # (16, 150, 179)

        if mode=='train':
            # cross entropy loss with logits ( for training set )
            self.loss = logits.sg_ce(target=self.y, mask=True)
            istarget = tf.not_equal(self.y, 0).sg_float()
            self.reduced_loss = (self.loss.sg_sum()) / (istarget.sg_sum() + 1e-8)
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
        else: # inference
            self.preds = logits.sg_argmax() 
コード例 #21
0
def sg_lstm(tensor, opt):
    r"""Applies an LSTM.

    Args:
      tensor: A 3-D `Tensor` (automatically passed by decorator).
      opt:
        in_dim: A positive `integer`. The size of input dimension.
        dim: A positive `integer`. The size of output dimension.
        bias: Boolean. If True, biases are added.
        ln: Boolean. If True, layer normalization is applied.   
        init_state: A 2-D `Tensor`. If None, the initial state is set to zeros.
        last_only: Boolean. If True, the outputs in the last time step are returned.
        mask: Boolean 2-D `Tensor` or None(default).
            For false elements values are excluded from the calculation.
            As a result, the outputs for the locations become 0.
        summary: If True, summaries are added. The default is True.

    Returns:
      A `Tensor`. If last_only is True, the output tensor has shape [batch size, dim].
      Otherwise, [batch size, time steps, dim].
    """
    # layer normalization
    # noinspection PyPep8
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(hh, cc, x):
        # forget gate
        f = tf.sigmoid(ln(tf.matmul(x, w_f) + tf.matmul(hh, u_f) + (b_f if opt.bias else 0)))
        # input gate
        ii = tf.sigmoid(ln(tf.matmul(x, w_i) + tf.matmul(hh, u_i) + (b_i if opt.bias else 0)))
        # new cell value
        c_new = tf.tanh(ln(tf.matmul(x, w_c) + tf.matmul(hh, u_c) + (b_c if opt.bias else 0)))
        # out gate
        o = tf.sigmoid(ln(tf.matmul(x, w_o) + tf.matmul(hh, u_o) + (b_o if opt.bias else 0)))
        # cell update
        cell = f * cc + ii * c_new
        # final output
        y = o * tf.tanh(cell)
        return y, cell

    # parameter initialize
    w_i = tf.sg_initializer.orthogonal('W_i', (opt.in_dim, opt.dim), summary=opt.summary)
    u_i = tf.sg_initializer.identity('U_i', opt.dim, summary=opt.summary)
    w_f = tf.sg_initializer.orthogonal('W_f', (opt.in_dim, opt.dim), summary=opt.summary)
    u_f = tf.sg_initializer.identity('U_f', opt.dim, summary=opt.summary)
    w_o = tf.sg_initializer.orthogonal('W_o', (opt.in_dim, opt.dim), summary=opt.summary)
    u_o = tf.sg_initializer.identity('U_o', opt.dim, summary=opt.summary)
    w_c = tf.sg_initializer.orthogonal('W_c', (opt.in_dim, opt.dim), summary=opt.summary)
    u_c = tf.sg_initializer.identity('U_c', opt.dim, summary=opt.summary)
    if opt.bias:
        b_i = tf.sg_initializer.constant('b_i', opt.dim, summary=opt.summary)
        b_f = tf.sg_initializer.constant('b_f', opt.dim, summary=opt.summary)
        b_o = tf.sg_initializer.constant('b_o', opt.dim, value=1, summary=opt.summary)
        b_c = tf.sg_initializer.constant('b_c', opt.dim, summary=opt.summary)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = tf.sg_initializer.constant('beta', opt.dim, summary=opt.summary)
        gamma = tf.sg_initializer.constant('gamma', opt.dim, value=1, summary=opt.summary)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, c, out = init_h, init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h, c = step(h, c, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(axis=1))

    # merge tensor
    out = tf.concat(out, 1)

    # apply mask
    if opt.mask is None:
        if opt.last_only:
            return out[:, -1, :]
        else:
            return out
    else:
        # apply mask
        out *= opt.mask.sg_expand_dims(axis=2).sg_float()

        if opt.last_only:
            # calc sequence length using given mask
            seq_len = opt.mask.sg_int().sg_sum(axis=1)
            # get last output
            rev = tf.reverse_sequence(out, seq_len, seq_axis=1)
            return rev[:, 0, :]
        else:
            return out
コード例 #22
0
def sg_gru(tensor, opt):
    r"""Applies a GRU.
    
    Args:
      tensor: A 3-D `Tensor`.
      in_dim: A positive `integer`. The size of input dimension.
      dim: A positive `integer`. The size of output dimension.
      bias: Boolean. If True, biases are added.
      ln: Boolean. If True, layer normalization is applied.   
      init_state: A 2-D `Tensor`. If None, the initial state is set to zeros.
      last_only: Boolean. If True, the outputs in the last time step are returned.
    
    Returns:
      A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim].
        If last_only is True, the shape will be [batch size, dim].
    """

    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(h, x):
        # update gate
        z = tf.sigmoid(
            ln(
                tf.matmul(x, w_z) + tf.matmul(h, u_z) +
                (b_z if opt.bias else 0)))
        # reset gate
        r = tf.sigmoid(
            ln(
                tf.matmul(x, w_r) + tf.matmul(h, u_r) +
                (b_r if opt.bias else 0)))
        # h_hat
        hh = tf.tanh(
            ln(
                tf.matmul(x, w_h) + tf.matmul(r * h, u_h) +
                (b_h if opt.bias else 0)))
        # final output
        y = (1. - z) * h + z * hh
        return y

    # parameter initialize
    w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim))
    u_z = init.identity('U_z', opt.dim)
    w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim))
    u_r = init.identity('U_r', opt.dim)
    w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim))
    u_h = init.identity('U_h', opt.dim)
    if opt.bias:
        b_z = init.constant('b_z', opt.dim)
        b_r = init.constant('b_r', opt.dim)
        b_h = init.constant('b_h', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, out = init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h = step(h, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out
コード例 #23
0
def sg_lstm(tensor, opt):
    r"""Applies an LSTM.

    Args:
      tensor: A 3-D `Tensor`.
      in_dim: A positive `integer`. The size of input dimension.
      dim: A positive `integer`. The size of output dimension.
      bias: Boolean. If True, biases are added.
      ln: Boolean. If True, layer normalization is applied.   
      init_state: A 2-D `Tensor`. If None, the initial state is set to zeros.
      last_only: Boolean. If True, the outputs in the last time step are returned.
    
    Returns:
      A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim].
        If last_only is True, the shape will be [batch size, dim].
    """
    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(h, c, x):
        # forget gate
        f = tf.sigmoid(
            ln(
                tf.matmul(x, w_f) + tf.matmul(h, u_f) +
                (b_f if opt.bias else 0)))
        # input gate
        i = tf.sigmoid(
            ln(
                tf.matmul(x, w_i) + tf.matmul(h, u_i) +
                (b_i if opt.bias else 0)))
        # new cell value
        cc = tf.tanh(
            ln(
                tf.matmul(x, w_c) + tf.matmul(h, u_c) +
                (b_c if opt.bias else 0)))
        # out gate
        o = tf.sigmoid(
            ln(
                tf.matmul(x, w_o) + tf.matmul(h, u_o) +
                (b_o if opt.bias else 0)))
        # cell update
        cell = f * c + i * cc
        # final output
        y = o * tf.tanh(cell)
        return y, cell

    # parameter initialize
    w_i = init.orthogonal('W_i', (opt.in_dim, opt.dim))
    u_i = init.identity('U_i', opt.dim)
    w_f = init.orthogonal('W_f', (opt.in_dim, opt.dim))
    u_f = init.identity('U_f', opt.dim)
    w_o = init.orthogonal('W_o', (opt.in_dim, opt.dim))
    u_o = init.identity('U_o', opt.dim)
    w_c = init.orthogonal('W_c', (opt.in_dim, opt.dim))
    u_c = init.identity('U_c', opt.dim)
    if opt.bias:
        b_i = init.constant('b_i', opt.dim)
        b_f = init.constant('b_f', opt.dim)
        b_o = init.constant('b_o', opt.dim, value=1)
        b_c = init.constant('b_c', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, c, out = init_h, init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h, c = step(h, c, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out
コード例 #24
0
ファイル: train.py プロジェクト: liean/quasi-rnn
    def __init__(self, mode="train"):
        # Inputs and Labels
        if mode == "train":
            self.x, self.y, self.num_batch = get_batch_data(
            )  # (16, 150) int32, (16, 150) int32, int
            self.y_src = tf.concat(
                [tf.zeros((Hp.batch_size, 1), tf.int32), self.y[:, :-1]],
                1)  # (16, 150) int32
        else:  # inference
            self.x = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen))
            self.y_src = tf.placeholder(tf.int32,
                                        shape=(Hp.batch_size, Hp.maxlen))

        # Load vocabulary
        char2idx, idx2char = load_vocab()

        # Embedding
        emb_x = tf.sg_emb(name='emb_x',
                          voca_size=len(char2idx),
                          dim=Hp.hidden_units)  # (179, 320)
        emb_y = tf.sg_emb(name='emb_y',
                          voca_size=len(char2idx),
                          dim=Hp.hidden_units)  # (179, 320)
        X = self.x.sg_lookup(emb=emb_x)  # (16, 150, 320)
        Y = self.y_src.sg_lookup(emb=emb_y)  # (16, 150, 320)

        # Encoding
        conv = X.sg_quasi_conv1d(is_enc=True, size=6)  # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False)  # (16*4, 150, 320)
        H_zfo1 = pool[Hp.batch_size:]  # (16*3, 15, 320) for decoding

        conv = pool.sg_quasi_conv1d(is_enc=True, size=2)  # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False)  # (16*4, 150, 320)
        H_zfo2 = pool[Hp.batch_size:]  # (16*3, 150, 320) for decoding

        conv = pool.sg_quasi_conv1d(is_enc=True, size=2)  # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False)  # (16*4, 150, 320)
        H_zfo3 = pool[Hp.batch_size:]  # (16*3, 150, 320) for decoding

        conv = pool.sg_quasi_conv1d(is_enc=True, size=2)  # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False)  # (16*4, 150, 320)
        H4 = pool[:Hp.batch_size]  # (16, 150, 320) for decoding
        H_zfo4 = pool[Hp.batch_size:]  # (16*3, 150, 320) for decoding

        # Decoding
        d_conv = (Y.sg_concat(target=H_zfo1,
                              axis=0).sg_quasi_conv1d(is_enc=False, size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False,
                                     att=False)  # (16*4, 150, 320)

        d_conv = (d_pool.sg_concat(target=H_zfo2,
                                   axis=0).sg_quasi_conv1d(is_enc=False,
                                                           size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False,
                                     att=False)  # (16*4, 150, 320)

        d_conv = (d_pool.sg_concat(target=H_zfo3,
                                   axis=0).sg_quasi_conv1d(is_enc=False,
                                                           size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False,
                                     att=False)  # (16*4, 150, 320)

        d_conv = (d_pool.sg_concat(target=H_zfo4,
                                   axis=0).sg_quasi_conv1d(is_enc=False,
                                                           size=2))
        concat = H4.sg_concat(target=d_conv, axis=0)
        d_pool = concat.sg_quasi_rnn(is_enc=False, att=True)  # (16, 150, 320)

        logits = d_pool.sg_conv1d(size=1, dim=len(char2idx),
                                  act="linear")  # (16, 150, 179)

        if mode == 'train':
            # cross entropy loss with logits ( for training set )
            loss = logits.sg_ce(target=self.y, mask=True)
            istarget = tf.not_equal(self.y, 0).sg_float()
            self.reduced_loss = (loss.sg_sum()) / (istarget.sg_sum() + 0.00001)
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
        else:  # inference
            self.preds = logits.sg_argmax()
コード例 #25
0

#
# inputs
#

# MNIST input tensor ( with QueueRunner )
data = tf.sg_data.Mnist(batch_size=batch_size)

# input images and label
x = data.train.image
y = data.train.label

# labels for discriminator
y_real = tf.ones(batch_size)
y_fake = tf.zeros(batch_size)

# discriminator labels ( half 1s, half 0s )
y_disc = tf.concat(0, [y, y * 0])

# categorical latent variable
z_cat = tf.multinomial(
    tf.ones((batch_size, cat_dim), dtype=tf.sg_floatx) / cat_dim,
    1).sg_squeeze().sg_int()
# continuous latent variable
z_con = tf.random_normal((batch_size, con_dim))
# random latent variable dimension
z_rand = tf.random_normal((batch_size, rand_dim))
# latent variable
z = tf.concat(1, [z_cat.sg_one_hot(depth=cat_dim), z_con, z_rand])
コード例 #26
0
ファイル: sg_layer.py プロジェクト: joastern/sugartensor
def sg_gru(tensor, opt):

    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(h, x):
        # update gate
        z = tf.sigmoid(
            ln(
                tf.matmul(x, w_z) + tf.matmul(h, u_z) +
                (b_z if opt.bias else 0)))
        # reset gate
        r = tf.sigmoid(
            ln(
                tf.matmul(x, w_r) + tf.matmul(h, u_r) +
                (b_r if opt.bias else 0)))
        # h_hat
        hh = tf.tanh(
            ln(
                tf.matmul(x, w_h) + tf.matmul(r * h, u_h) +
                (b_h if opt.bias else 0)))
        # final output
        y = (1. - z) * h + z * hh
        return y

    # parameter initialize
    w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim))
    u_z = init.identity('U_z', opt.dim)
    w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim))
    u_r = init.identity('U_r', opt.dim)
    w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim))
    u_h = init.identity('U_h', opt.dim)
    if opt.bias:
        b_z = init.constant('b_z', opt.dim)
        b_r = init.constant('b_r', opt.dim)
        b_h = init.constant('b_h', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, out = init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h = step(h, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out