예제 #1
0
            def __build_uni_model(inputs, name):
                model = {}
                with tf.variable_scope(name, reuse=self.reuse):
                    s = tf.shape(inputs)  # Get input shape
                    # Reshape from [T, B, C] to [T * B, C]
                    inputs = tf.reshape(inputs, [s[0] * s[1], s[2]])
                    with tf.device('/cpu:0'):
                        W = tf.get_variable(
                            shape=[self.char_vocab_size, self.char_vec_size],
                            initializer=tf.glorot_uniform_initializer(),
                            name="embedding_weight")
                        if self.is_training and self.drop_e > 0.0:
                            W = embedding_dropout(W, dropout=self.drop_e)
                        char_embed = tf.nn.embedding_lookup(W, inputs)
                    conv_out = []
                    for fsz, num in self.char_cnn_layers:
                        x = tf.layers.conv1d(
                            char_embed,
                            num,
                            fsz,
                            activation=tf.nn.relu,
                            kernel_initializer=tf.glorot_uniform_initializer(),
                            padding='same')
                        x = tf.reduce_max(x, axis=1)
                        conv_out.append(x)
                    embedding = tf.concat(conv_out, axis=-1)
                    embedding = tf.reshape(
                        embedding,
                        (s[0], s[1], sum(x for _, x in self.char_cnn_layers)))
                    input_shape = s
                    ops = []
                    inputs = embedding
                    layer_outputs = []
                    for idx, l in enumerate(self.rnn_layers):
                        cell = CudnnLSTM(num_layers=1,
                                         num_units=l['units'],
                                         input_mode='linear_input',
                                         direction='unidirectional',
                                         dropout=0.0)
                        saved_state = (tf.get_variable(
                            shape=[1, 1, l['units']],
                            name='c_' + str(idx),
                            trainable=False),
                                       tf.get_variable(
                                           shape=[1, 1, l['units']],
                                           name='h_' + str(idx),
                                           trainable=False))
                        for x in saved_state:
                            tf.add_to_collection(LSTM_SAVED_STATE, x)
                        zeros = tf.zeros([1, input_shape[1], l['units']],
                                         dtype=tf.float32)
                        zero_state = (zeros, zeros)

                        def if_true():
                            return zero_state

                        def if_false():
                            return saved_state

                        drop_i = l.get('drop_i', 0.0)
                        if self.is_training and drop_i > 0.0:
                            inputs = tf.nn.dropout(x=inputs,
                                                   keep_prob=1 - drop_i,
                                                   noise_shape=[
                                                       1, input_shape[1],
                                                       inputs.shape[-1]
                                                   ],
                                                   name='drop_i_' + str(idx))
                        cell.build(inputs.shape)
                        wdrop = l.get('wdrop', 0.0)
                        if self.is_training and wdrop > 0.0:
                            cell_var = cell.variables[0]
                            h_var_backup = tf.Variable(initial_value=tf.zeros(
                                shape=[4 * l['units'], l['units']]),
                                                       trainable=False,
                                                       name='h_var_backup_' +
                                                       str(idx))
                            h_var = cell_var[inputs.shape[-1] * l['units'] *
                                             4:-l['units'] * 8]
                            h_var = tf.reshape(
                                h_var,
                                [4 * l['units'], l['units']]) + h_var_backup
                            keep_prob = 1 - wdrop
                            random_tensor = keep_prob
                            random_tensor += tf.random_uniform(
                                [4 * l['units'], 1], dtype=h_var.dtype)
                            # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
                            binary_tensor = tf.floor(random_tensor)
                            new_h_var = tf.multiply(h_var, binary_tensor)
                            new_h_var = tf.reshape(
                                new_h_var, [4 * l['units'] * l['units']])
                            h_var_backup = tf.assign(
                                h_var_backup,
                                tf.multiply(h_var,
                                            tf.subtract(1.0, binary_tensor)),
                                validate_shape=True,
                                use_locking=True,
                                name='assign_h_var_backup_' + str(idx))
                            new_cell_var = tf.concat([
                                cell_var[:inputs.shape[-1] * l['units'] * 4],
                                new_h_var, cell_var[-l['units'] * 8:]
                            ],
                                                     axis=0,
                                                     name='new_cell_var_' +
                                                     str(idx))
                            op = tf.assign(cell_var,
                                           new_cell_var,
                                           validate_shape=True,
                                           use_locking=True,
                                           name='assign_new_cell_var_' +
                                           str(idx))
                            with tf.control_dependencies([op, h_var_backup]):
                                outputs, state = cell.call(
                                    inputs=inputs,
                                    initial_state=tf.cond(
                                        self.reset_state, if_true, if_false),
                                    training=self.is_training)
                        else:
                            outputs, state = cell.call(
                                inputs=inputs,
                                initial_state=tf.cond(self.reset_state,
                                                      if_true, if_false),
                                training=self.is_training)
                        if isinstance(self.fine_tune_lr, list):
                            outputs = apply_custom_lr(outputs,
                                                      self.fine_tune_lr[idx])
                        drop_o = l.get('drop_o', 0.0)
                        if self.is_training and drop_o > 0.0:
                            outputs = tf.nn.dropout(x=outputs,
                                                    keep_prob=1 - drop_o,
                                                    noise_shape=[
                                                        1, input_shape[1],
                                                        outputs.shape[-1]
                                                    ],
                                                    name='drop_o_' + str(idx))
                        ops.append(
                            tf.assign(saved_state[0],
                                      state[0],
                                      validate_shape=False))
                        ops.append(
                            tf.assign(saved_state[1],
                                      state[1],
                                      validate_shape=False))
                        inputs = outputs
                        layer_outputs.append(outputs)
                    model['layer_outputs'] = layer_outputs
                    ops = tf.group(ops)
                    with tf.control_dependencies([ops]):
                        rnn_outputs = tf.multiply(inputs,
                                                  tf.expand_dims(
                                                      self.seq_masks, axis=-1),
                                                  name='rnn_outputs')
                    model['rnn_outputs'] = rnn_outputs
                    decoder = tf.nn.xw_plus_b(
                        tf.reshape(rnn_outputs, [
                            input_shape[0] * input_shape[1],
                            self.rnn_layers[-1]['units']
                        ]), self.share_decode_W, self.share_decode_b)
                    decoder = tf.reshape(
                        decoder,
                        [input_shape[0], input_shape[1], self.vocab_size])
                    model['decoder'] = decoder
                    return model
예제 #2
0
import os
import tensorflow as tf
from tensorflow.contrib.cudnn_rnn import CudnnLSTM as CudnnLSTM
inp = tf.zeros([10, 32, 100])
lstm1 = CudnnLSTM(1, 128)
lstm2 = CudnnLSTM(2, 256)
lstm1.build(inp.shape)
lstm2.build(inp.shape)
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    save_path = 'test_cudnn_lstm_save/1'
    if not os.path.exists(save_path):
        os.makedirs(os.path.join(save_path))
    saver.save(sess, save_path)