예제 #1
0
파일: train.py 프로젝트: liean/quasi-rnn
def sg_quasi_rnn(tensor, opt):
    # Split
    if opt.att:
        H, Z, F, O = tf.split(tensor, 4, axis=0)  # (16, 150, 320) for all
    else:
        Z, F, O = tf.split(tensor, 3, axis=0)  # (16, 150, 320) for all

    # step func
    def step(z, f, o, c):
        '''
        Runs fo-pooling at each time step
        '''
        c = f * c + (1 - f) * z

        if opt.att:  # attention
            a = tf.nn.softmax(tf.einsum("ijk,ik->ij", H,
                                        c))  # alpha. (16, 150)
            k = (a.sg_expand_dims() * H).sg_sum(
                axis=1)  # attentional sum. (16, 320)
            h = o * (k.sg_dense(act="linear") + \
                     c.sg_dense(act="linear"))
        else:
            h = o * c

        return h, c  # hidden states, (new) cell memories

    # Do rnn loop
    c, hs = 0, []
    timesteps = tensor.get_shape().as_list()[1]
    for t in range(timesteps):
        z = Z[:, t, :]  # (16, 320)
        f = F[:, t, :]  # (16, 320)
        o = O[:, t, :]  # (16, 320)

        # apply step function
        h, c = step(z, f, o, c)  # (16, 320), (16, 320)

        # save result
        hs.append(h.sg_expand_dims(axis=1))

    # Concat to return
    H = tf.concat(hs, 1)  # (16, 150, 320)
    seqlen = tf.to_int32(
        tf.reduce_sum(tf.sign(tf.abs(tf.reduce_sum(H, axis=-1))),
                      1))  # (16,) float32
    h = tf.reverse_sequence(input=H, seq_lengths=seqlen,
                            seq_dim=1)[:, 0, :]  # last hidden state vector

    if opt.is_enc:
        H_z = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)),
                      [1, timesteps, 1])
        H_f = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)),
                      [1, timesteps, 1])
        H_o = tf.tile((h.sg_dense(act="linear").sg_expand_dims(axis=1)),
                      [1, timesteps, 1])
        concatenated = tf.concat([H, H_z, H_f, H_o], 0)  # (16*4, 150, 320)
        return concatenated
    else:
        return H  # (16, 150, 320)
예제 #2
0
def sg_quasi_rnn(tensor, opt):
    # Split
    if opt.att:
        H, Z, F, O = tf.split(axis=0, num_or_size_splits=4,
                              value=tensor)  # (16, 150, 320) for all
    else:
        Z, F, O = tf.split(axis=0, num_or_size_splits=3,
                           value=tensor)  # (16, 150, 320) for all

    # step func
    def step(z, f, o, c):
        '''
        Runs fo-pooling at each time step
        '''
        c = f * c + (1 - f) * z

        if opt.att:  # attention
            a = tf.nn.softmax(tf.einsum("ijk,ik->ij", H,
                                        c))  # alpha. (16, 150)
            k = (a.sg_expand_dims() * H).sg_sum(
                dims=1)  # attentional sum. (16, 150)
            h = o * (k.sg_dense(act="linear") + c.sg_dense(act="linear"))
        else:
            h = o * c

        return h, c  # hidden states, (new) cell memories

    # Do rnn loop
    c, hs = 0, []
    timesteps = tensor.get_shape().as_list()[1]
    for t in range(timesteps):
        z = Z[:, t, :]  # (16, 320)
        f = F[:, t, :]  # (16, 320)
        o = O[:, t, :]  # (16, 320)

        # apply step function
        h, c = step(z, f, o, c)  # (16, 320), (16, 320)

        # save result
        hs.append(h.sg_expand_dims(dim=1))

    # Concat to return
    H = tf.concat(axis=1, values=hs)  # (16, 150, 320)
    if opt.is_enc:
        H_z = tf.tile((h.sg_dense(act="linear").sg_expand_dims(dim=1)),
                      [1, timesteps, 1])
        H_f = tf.tile((h.sg_dense(act="linear").sg_expand_dims(dim=1)),
                      [1, timesteps, 1])
        H_o = tf.tile((h.sg_dense(act="linear").sg_expand_dims(dim=1)),
                      [1, timesteps, 1])

        concatenated = tf.concat(axis=0, values=[H, H_z, H_f,
                                                 H_o])  # (16*4, 150, 320)
        return concatenated
    else:
        return H  # (16, 150, 320)
예제 #3
0
    def __call__(self, tensor, state, scope=None):
        (prev_c, prev_h) = state
        # i = input_gate, c = new cell value for update, f = forget_gate, o = output_gate
        lstm_matrix = self._linear([tensor, prev_h])
        i, c, f, o = tf.split(value=lstm_matrix, num_or_size_splits=4, axis=1)
        if self._ln:
            with tf.variable_scope("ln_rnn", reuse=True):
                beta = tf.get_variable('beta')
                gamma = tf.get_variable('gamma')

        ln = lambda v: _ln_rnn(v, gamma, beta) if self._ln else v

        # do rnn loop
        new_c = prev_c * tf.sigmoid(ln(f)) + tf.sigmoid(
            ln(i)) * self._activation(ln(c))
        new_h = self._activation(new_c) * tf.sigmoid(ln(o))

        return (new_c, new_h)
예제 #4
0
#
# hyper parameters
#

batch_size = 16  # total batch size

#
# inputs
#

# corpus input tensor
data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus())

# mfcc feature of audio
inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0)
# target sentence label
labels = tf.split(data.label, tf.sg_gpus(), axis=0)

# sequence length except zero-padding
seq_len = []
for input_ in inputs:
    seq_len.append(
        tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1))


# parallel loss tower
@tf.sg_parallel
def get_loss(opt):
    # encode audio feature
    logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size)
def generate():
    dev = '/cpu:0'
    with tf.device(dev):
        mydir = 'tfrc150char_wrd0704'
        files = [f for f in listdir(mydir) if isfile(join(mydir, f))]
        tfrecords_filename = []
        tfrecords_filename = [join(mydir, 'short_infer3.tfrecords')
                              ]  #[join(mydir, f) for f in tfrecords_filename]
        tfrecords_filename_inf = [join(mydir, '11_3.tfrecords')]

        print(tfrecords_filename)
        filename_queue = tf.train.string_input_producer(tfrecords_filename,
                                                        num_epochs=num_epochs,
                                                        shuffle=True,
                                                        capacity=1)
        infer_queue = tf.train.string_input_producer(tfrecords_filename_inf,
                                                     num_epochs=num_epochs,
                                                     shuffle=True,
                                                     capacity=1)

        optim = tf.train.AdamOptimizer(learning_rate=0.0001,
                                       beta1=0.9,
                                       beta2=0.99)

        # Calculate the gradients for each model tower.
        tower_grads = []
        reuse_vars = False
        with tf.variable_scope("dec_lstm") as scp:
            dec_cell = BasicLSTMCell2(Hp.w_emb_size,
                                      Hp.rnn_hd,
                                      state_is_tuple=True)

        with tf.variable_scope("contx_lstm") as scp:
            cell = BasicLSTMCell2(Hp.hd, Hp.rnn_hd, state_is_tuple=True)
            rnn_cell = tf.contrib.rnn.DropoutWrapper(
                cell,
                input_keep_prob=Hp.keep_prob,
                output_keep_prob=Hp.keep_prob)

        (words, chars) = read_and_decode(filename_queue,
                                         Hp.batch_size * Hp.num_gpus)

        words_splits = tf.split(axis=0,
                                num_or_size_splits=Hp.num_gpus,
                                value=words)
        chars_splits = tf.split(axis=0,
                                num_or_size_splits=Hp.num_gpus,
                                value=chars)

        word_emb = np.loadtxt("glove300d_0704.txt")
        Hp.word_vs = word_emb.shape[0]

        # --------------------------------------------------------------------------------
        with tf.name_scope('%s_%d' % ("tower", 0)) as scope:
            rnn_state = tower_infer_enc(chars_splits[0],
                                        scope,
                                        rnn_cell,
                                        dec_cell,
                                        word_emb,
                                        out_reuse_vars=False,
                                        dev='/cpu:0')

            chars_pl = tf.placeholder(tf.int32, shape=(None, Hp.c_maxlen))
            rnn_state_pl1 = [
                tf.placeholder(tf.float32, shape=(None, Hp.rnn_hd)),
                tf.placeholder(tf.float32, shape=(None, Hp.rnn_hd))
            ]
            rnn_state_pl = tf.contrib.rnn.LSTMStateTuple(
                rnn_state_pl1[0], rnn_state_pl1[1])

            final_ids, rnn_state_dec = tower_infer_dec(chars_pl,
                                                       scope,
                                                       rnn_cell,
                                                       dec_cell,
                                                       word_emb,
                                                       rnn_state_pl,
                                                       out_reuse_vars=False,
                                                       dev='/cpu:0')

        # --------------------------------------------------------------------------------

        saver = tf.train.Saver(tf.trainable_variables())
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.per_process_gpu_memory_fraction = 0.94

        session_config.gpu_options.allow_growth = False

        restore_dir = 'tnsrbrd/hin17d08m_1313g2'  #   lec30d07m_1634g2   lec04d07m_2006g2     lec28d07m_1221g2    lec31d07m_1548g2
        csv_file = join(restore_dir, time.strftime("hin%dd%mm_%H%M.csv"))
        csv_f = open(csv_file, 'a')
        csv_writer = csv.writer(csv_f)

        with tf.Session(config=session_config) as sess:
            sess.run(
                tf.group(tf.global_variables_initializer(),
                         tf.local_variables_initializer()))

            tf.train.start_queue_runners(sess=sess)
            saver.restore(sess,
                          tf.train.latest_checkpoint(
                              join(restore_dir,
                                   'last_chpt')))  #    lec04d07m_2006g2

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            for ep in range(num_epochs):

                tf.sg_set_infer(sess)
                rnn_state_val, w_txt, ch_txt = sess.run(
                    [rnn_state, words_splits[0], chars_splits[0]],
                    feed_dict={Hp.keep_prob: 1.0})

                predictions = []  #[w_txt[:,2,:]]
                for idx in range(3):
                    char_inpt = word2char_ids(
                        ids_val) if idx != 0 else ch_txt[:, 2, :]
                    ids_val, rnn_state_val = sess.run(
                        [final_ids, rnn_state_dec],
                        feed_dict={
                            Hp.keep_prob: 1.0,
                            rnn_state_pl1[0]: rnn_state_val[0],
                            rnn_state_pl1[1]: rnn_state_val[1],
                            chars_pl: char_inpt
                        })
                    temp = np.zeros((Hp.batch_size, Hp.w_maxlen))
                    for b in range(Hp.batch_size):
                        stop_ind = np.where(ids_val[b] == 2)[0]
                        if stop_ind.size > 0:
                            stop_ind = stop_ind[0]
                            ids_val[b, stop_ind +
                                    1:] = ids_val[b, stop_ind + 1:] * 0
                    temp[:, :ids_val.shape[1]] = ids_val
                    predictions.append(temp)

                # predictions are decode_sent x b x w_maxlen
                predictions = np.array(predictions)
                in_batches = [w_txt[b, :, :] for b in range(Hp.batch_size)]
                res_batches = [
                    predictions[:, b, :] for b in range(Hp.batch_size)
                ]

                for b in range(Hp.batch_size):
                    in_paragraph = idxword2txt(in_batches[b])
                    print("\n INPUT SAMPLE \n")
                    print(in_paragraph)

                    res_paragraph = idxword2txt(res_batches[b])
                    print("\n RESULTS \n")
                    print(res_paragraph)

                    csv_writer.writerow([
                        " ".join(in_paragraph[:3]), " ".join(in_paragraph[3:]),
                        " ".join(res_paragraph)
                    ])

            csv_f.close()
def sg_quasi_rnn(tensor, opt):
    # Split
    if opt.att:
        H, Z, F, O = tf.split(axis=0, num_or_size_splits=4,
                              value=tensor)  # (b, seqlen, hd) for all
    else:
        Z, F, O = tf.split(axis=0, num_or_size_splits=3,
                           value=tensor)  # (b, seqlen, hd) for all

    # step func
    def step(z, f, o, c):
        '''
        Runs fo-pooling at each time step
        '''
        c = f * c + (1 - f) * z

        if opt.att:  # attention
            a = tf.nn.softmax(tf.einsum("ijk,ik->ij", H,
                                        c))  # alpha. (b, seqlen)
            k = (a.sg_expand_dims() * H).sg_sum(
                axis=1)  # attentional sum. (b, seqlen)
            h = o * (k.sg_dense_gpus(act="linear",name = "k%d_%s"%(t,opt.name),dev = opt.dev,reuse=opt.reuse_vars)\
                + c.sg_dense_gpus(act="linear",name = "c%d_%s"%(t,opt.name),dev = opt.dev,reuse=opt.reuse_vars))
        else:
            h = o * c

        return h, c  # hidden states, (new) cell memories

    # Do rnn loop
    c, hs = 0, []
    timesteps = tensor.get_shape().as_list()[1]
    for t in range(timesteps):
        z = Z[:, t, :]  # (b, hd)
        f = F[:, t, :]  # (b, hd)
        o = O[:, t, :]  # (b, hd)

        # apply step function
        h, c = step(z, f, o, c)  # (b, hd), (b, hd)

        # save result
        hs.append(h.sg_expand_dims(axis=1))

    # Concat to return
    H = tf.concat(hs, 1)  # (b, seqlen, hd)

    if opt.is_enc:
        H_z = tf.tile(
            (h.sg_dense_gpus(act="linear",
                             name="z_%s" % (opt.name),
                             dev=opt.dev,
                             reuse=opt.reuse_vars).sg_expand_dims(axis=1)),
            [1, timesteps, 1])
        H_f = tf.tile(
            (h.sg_dense_gpus(act="linear",
                             name="f_%s" % (opt.name),
                             dev=opt.dev,
                             reuse=opt.reuse_vars).sg_expand_dims(axis=1)),
            [1, timesteps, 1])
        H_o = tf.tile(
            (h.sg_dense_gpus(act="linear",
                             name="o_%s" % (opt.name),
                             dev=opt.dev,
                             reuse=opt.reuse_vars).sg_expand_dims(axis=1)),
            [1, timesteps, 1])
        concatenated = tf.concat(axis=0, values=[H, H_z, H_f,
                                                 H_o])  # (b*4, seqlen, hd)

        return concatenated
    else:
        return H  # (b, seqlen, hd)
예제 #7
0
#
# hyper parameters
#

batch_size = 16    # total batch size

#
# inputs
#

# corpus input tensor
data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus())

# mfcc feature of audio
inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0)
# target sentence label
labels = tf.split(data.label, tf.sg_gpus(), axis=0)

# sequence length except zero-padding
seq_len = []
for input_ in inputs:
    seq_len.append(tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1))


# parallel loss tower
@tf.sg_parallel
def get_loss(opt):
    # encode audio feature
    logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size)
    # CTC loss
예제 #8
0
import sugartensor as tf

__author__ = '*****@*****.**'

# set log level to debug
tf.sg_verbosity(10)

# batch size
batch_size = 128

# MNIST input tensor ( batch size should be adjusted for multiple GPUS )
data = tf.sg_data.Mnist(batch_size=batch_size * tf.sg_gpus())

# split inputs for each GPU tower
inputs = tf.split(data.train.image, tf.sg_gpus(), axis=0)
labels = tf.split(data.train.label, tf.sg_gpus(), axis=0)


# simple wrapping function with decorator for parallel training
@tf.sg_parallel
def get_loss(opt):

    # conv layers
    with tf.sg_context(name='convs', act='relu', bn=True):
        conv = (opt.input[opt.gpu_index].sg_conv(
            dim=16, name='conv1').sg_pool().sg_conv(
                dim=32,
                name='conv2').sg_pool().sg_conv(dim=32,
                                                name='conv3').sg_pool())

    # fc layers
예제 #9
0
#
# hyper parameters
#

batch_size = 16  # total batch size

#
# inputs
#

# corpus input tensor
data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus())

# mfcc feature of audio
inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0)
# mfcc_noise feature of audio
inputs_noise = tf.split(data.mfcc_noise, tf.sg_gpus(), axis=0)
# target sentence label
labels = tf.split(data.label, tf.sg_gpus(), axis=0)

# sequence length except zero-padding
seq_len = []
for input_ in inputs:
    seq_len.append(
        tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1))


def penalize_loss(gamma, lambd, tensor, tensor_n):

    #gamma * (vector-vector_d)**2 - lamada * (vector dot vector_d)/(nor(vector)*nor(vector_d))