def parse(self, x, context, is_training):
        with tf.variable_scope(self.scope):
            # Input RNN

            in_rnn = CudnnLSTM(1,
                               128,
                               direction=CUDNN_RNN_BIDIRECTION,
                               name="in_rnn")
            h_in, _ = in_rnn(tf.transpose(x, [1, 0, 2]))
            h_in = tf.reshape(
                tf.transpose(h_in, [1, 0, 2]),
                (self.bs, self.seq_in, 1, 256))  # (bs, seq_in, 1, 128)

            # Output RNN
            out_input = tf.zeros(
                (self.seq_out, self.bs, 1))  # consider teacher forcing.
            out_rnn = CudnnLSTM(1, 128, name="out_rnn")
            h_out, _ = out_rnn(out_input)
            h_out = tf.reshape(
                tf.transpose(h_out, [1, 0, 2]),
                (self.bs, 1, self.seq_out, 128))  # (bs, 1, seq_out, 128)

            # Bahdanau attention
            att = tf.nn.tanh(
                layers.fully_connected(h_out, 128, activation_fn=None) +
                layers.fully_connected(h_in, 128, activation_fn=None))
            att = layers.fully_connected(
                att, 1, activation_fn=None)  # (bs, seq_in, seq_out, 1)
            att = tf.nn.softmax(att, axis=1)  # (bs, seq_in, seq_out, 1)

            attended_h = tf.reduce_sum(att * h_in,
                                       axis=1)  # (bs, seq_out, 128)

            p_gen = layers.fully_connected(
                attended_h, 1, activation_fn=tf.nn.sigmoid)  # (bs, seq_out, 1)
            p_copy = (1 - p_gen)

            # Generate
            gen = layers.fully_connected(
                attended_h, self.n_out,
                activation_fn=None)  # (bs, seq_out, n_out)
            gen = tf.reshape(gen, (self.bs, self.seq_out, self.n_out))

            # Copy
            copy = tf.log(
                tf.reduce_sum(
                    att * tf.reshape(x, (self.bs, self.seq_in, 1, self.n_out)),
                    axis=1) + 1e-8)  # (bs, seq_out, n_out)

            output_logits = p_copy * copy + p_gen * gen
            return output_logits
예제 #2
0
 def _build_rnn(self, name, is_cuda, rnn_dim, inputs, state_dropout_rate,
                output_dropout_rate):
     with tf.variable_scope(name):
         if is_cuda:
             lstm_cell = CudnnLSTM(num_layers=1,
                                   num_units=rnn_dim,
                                   direction='bidirectional')
             outputs, _ = lstm_cell(inputs)
         else:
             state_keep_prob = 1. - state_dropout_rate * tf.cast(
                 self._is_training, tf.float32)
             with tf.variable_scope('cudnn_lstm'):
                 single_cell = lambda: DropoutWrapper(
                     CudnnCompatibleLSTMCell(rnn_dim),
                     state_keep_prob=state_keep_prob,
                     variational_recurrent=True,
                     input_size=inputs.get_shape()[-1],
                     dtype=tf.float32)
                 outputs, _, _ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
                     [single_cell()], [single_cell()],
                     inputs,
                     time_major=True,
                     dtype=tf.float32)
             outputs = tf.concat(outputs, axis=-1)
     outputs = tf.layers.dropout(outputs,
                                 output_dropout_rate,
                                 training=self._is_training,
                                 noise_shape=tf.concat(
                                     [[1], tf.shape(outputs)[1:]], axis=0))
     return outputs
예제 #3
0
 def check(**kwargs):
   print("kwargs:", kwargs)
   model = CudnnLSTM(**kwargs)
   params = tf.Variable(tf.random_uniform([model.params_size()]), validate_shape=False)
   session.run(params.initializer)
   s1 = model.params_size().eval()
   print("param size:", s1)
   # s2 = sum([wts.eval().shape[0] for wtss in model.params_to_canonical(params) for wts in wtss])
   weights, biases = model.params_to_canonical(params)
   for p in weights:
     print("weight:", p, "shape:", tf.shape(p).eval())
   for p in biases:
     print("bias:", p, "shape:", tf.shape(p).eval())
   s2 = sum([tf.reduce_prod(tf.shape(p)).eval() for p in weights + biases])
   print("summed up size:", s2)
   assert_equal(s1, s2)
def BiLSTM(x, filters, dropout=0.0, name='BiLSTM', layers=1, return_state=False):
    cudnn_lstm = CudnnLSTM(layers, filters, direction='bidirectional', name=name)
    if type(x) == list:
        assert len(x) == 2
        x1, x2 = x
        # cudnn compatibility: time first, batch second
        x1 = tf.transpose(x1, [1, 0, 2])
        x2 = tf.transpose(x2, [1, 0, 2])
        x1, x1_state = cudnn_lstm(x1)  # state:[2, bs, dim]
        x2, x2_state = cudnn_lstm(x2)
        x1 = tf.transpose(x1, [1, 0, 2])
        x2 = tf.transpose(x2, [1, 0, 2])
        x1_state = tf.concat(tf.unstack(x1_state[0], axis=0), axis=-1)
        x2_state = tf.concat(tf.unstack(x2_state[0], axis=0), axis=-1)
        if return_state:
            return tf.nn.dropout(x1_state, 1 - dropout), tf.nn.dropout(x2_state, 1 - dropout)
        else:
            return tf.nn.dropout(x1, 1 - dropout), tf.nn.dropout(x2, 1 - dropout)
    else:
        # cudnn compatibility: time first, batch second
        x = tf.transpose(x, [1, 0, 2])
        x, x_state = cudnn_lstm(x)
        if return_state:
            x_state = tf.concat(tf.unstack(x_state[0], axis=0), axis=-1)
            return tf.nn.dropout(x_state, 1 - dropout)
        else:
            x = tf.transpose(x, [1, 0, 2])
            return tf.nn.dropout(x, 1 - dropout)
예제 #5
0
 def __init__(self,
              GPU,
              num_layers,
              num_units,
              dropout=0.,
              dtype=tf.dtypes.float32,
              name=None):
     '''
     create a lstm adapter. equal to `LSTMBlockFusedCell` if GPU, else `CudnnLSTM`.
     '''
     base_layer.Layer.__init__(self, dtype=dtype, name=name)
     self.GPU = GPU
     self.dropout = dropout
     if GPU:
         self.model = CudnnLSTM(num_layers,
                                num_units,
                                dtype=self.dtype,
                                name=name)
     else:
         self.model = MultiFusedRNNCell([
             LSTMBlockFusedCell(num_units,
                                dtype=self.dtype,
                                name='%s_%d' % (name, i))
             for i in range(num_layers)
         ])
def build_cudnn_lstm(inps, num_layers, num_units):
    lstm = CudnnLSTM(
        num_layers,
        num_units,
        input_mode='linear_input',
    )
    output, _ = lstm(inps)
    return output
예제 #7
0
 def get_lstm_outputs2(self, chars, last_state=None, reuse=False):
     with tf.variable_scope('char_embedding', reuse=reuse):
         self.char_embedding = tf.get_variable('char_embedding', initializer=tf.orthogonal_initializer()(
             (self.NUM_CHARS, self.CHAR_EMBEDDING_SIZE)), dtype=tf.float32)
         out = tf.nn.embedding_lookup(self.char_embedding, chars)
     with tf.variable_scope('spam_gen_rnn', reuse=reuse):
         cud = CudnnLSTM(self.LAYERS, self.HIDDEN_LAYER_SIZE, self.CHAR_EMBEDDING_SIZE, dropout=0.5)
         out, a, b = cud(out, None, None, {})
     return out, (a, b)
def build_stacked_cudnn_lstm(inps, num_layers, num_units):
    lstms = [
        CudnnLSTM(
            1,
            num_units,
            input_mode='linear_input',
        ) for _ in range(num_layers)
    ]
    inter = inps
    for lstm in lstms:
        inter, _ = lstm(inter)
    return inter
예제 #9
0
파일: lstm.py 프로젝트: codeaudit/h-elmo
def add_cudnn_lstm(inps, state, num_layers, num_units, input_dim,
                   init_parameter):
    input_dim = max(input_dim, num_units)
    stddevs = compute_stddevs([num_units], input_dim, init_parameter)
    lstm = CudnnLSTM(
        num_layers,
        num_units,
        input_mode='linear_input',
        kernel_initializer=tf.truncated_normal_initializer(stddev=stddevs[0]))
    state = prepare_init_state(state, inps, lstm, 'cudnn')
    output, state = lstm(inps, initial_state=state)
    return output, state
예제 #10
0
def cudnn_lstm_module(input, name, train, units, recomp=False):
    """
    CUDNN LSTM module
    :param input: input tensor
    :param name: name for variable / scope
    :param train: is_train placeholder
    :param units: number of LSTM units
    :param recomp: whether used in recompute_gradient environment
    :return: output tensor after LSTM
    """
    should_learn = tf.logical_and(train, tf.logical_not(recomp))

    class BiasInit:
        """
        Custom initialization for LSTM bias init
        """
        def __init__(self, init):
            self.count = 0
            self.init = init

        def __call__(self, shape, dtype):
            if self.count >= len(self.init):
                self.count = 0
            cop = tf.constant(self.init[self.count], dtype=dtype, shape=shape)
            self.count += 1
            return cop

    lstm = CudnnLSTM(
        num_layers=1,
        dtype=tf.float32,
        num_units=units,
        direction='unidirectional',
        name=name,
        kernel_initializer=tf.contrib.layers.xavier_initializer(uniform=True),
        bias_initializer=BiasInit([0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0,
                                   0.0])  # initialize forget gate bias to 1.0
        # according to [An empirical exploration of recurrent network architectures, Jozefowicz et al., ICML'15]
        # https://github.com/keras-team/keras/blob/04cbccc8038c105374eef6eb2ce96d6746999860/keras/layers/cudnn_recurrent.py#L448
    )
    a = input
    # lstm swaps batch and time dimension
    a = tf.transpose(a, perm=[1, 0, 2])
    a, c = lstm(a, training=True)
    # swap back lstm batch and time dimension
    a = tf.transpose(a, perm=[1, 0, 2])

    return a
예제 #11
0
파일: lstm.py 프로젝트: codeaudit/h-elmo
def add_stacked_cudnn_lstm(inps, state, num_units, input_dim, init_parameter):
    stddevs = compute_stddevs(num_units, input_dim, init_parameter)
    lstms = [
        CudnnLSTM(
            1,
            nu,
            input_mode='linear_input',
            kernel_initializer=tf.truncated_normal_initializer(stddev=stddev))
        for nu, stddev in zip(num_units, stddevs)
    ]
    state = prepare_init_state(state, inps, lstms, 'cudnn_stacked')
    inter = inps
    new_state = list()
    for lstm, s in zip(lstms, state):
        inter, new_s = lstm(inter, initial_state=s)
        new_state.append(s)
    return inter, new_state
예제 #12
0
 def unrolled_rnn(self, inputs, lengths):
     if not self.use_cudnn_rnn:
         cell = self.cell()
         logits, state = tf.nn.dynamic_rnn(cell, inputs,
                                             sequence_length=lengths,
                                             dtype=self.FLOAT_TYPE,
                                             time_major=self.time_major_optimization,
                                             swap_memory=self.dynamic_rnn_swap_memory)
     else:
         rnn = CudnnLSTM(self.rnn_num_layers, self.rnn_num_units)
         from layers_utils import AffineProjectionLayer
         proj = AffineProjectionLayer(self.rnn_num_units, self.vocab_size, self.FLOAT_TYPE)
         inputs = tf.transpose(inputs, (1,0,2))
         out, state = rnn(inputs)
         out = tf.transpose(out, (1,0,2))
         logits = proj(out)
         logits = logits * tf.expand_dims(self.cost_mask(lengths, self.max_length(), False),-1)
     return logits, state
예제 #13
0
def get_cell(cell_type, size, layers=1, direction='unidirectional'):
    if cell_type == "layer_norm_basic":
        cell = LayerNormBasicLSTMCell(size)
    elif cell_type == "lstm_block_fused":
        cell = tf.contrib.rnn.LSTMBlockFusedCell(size)
    elif cell_type == "cudnn_lstm":
        cell = CudnnLSTM(layers, size, direction=direction)
    elif cell_type == "cudnn_gru":
        cell = CudnnGRU(layers, size, direction=direction)
    elif cell_type == "lstm_block":
        cell = LSTMBlockCell(size)
    elif cell_type == "gru_block":
        cell = GRUBlockCell(size)
    elif cell_type == "rnn":
        cell = BasicRNNCell(size)
    elif cell_type == "cudnn_rnn":
        cell = CudnnRNNTanh(layers, size)
    else:
        cell = BasicLSTMCell(size)
    return cell
예제 #14
0
 def build(self, input_shape):
     with tf.variable_scope(self.name, reuse=self.reuse):
         self.weights = []
         for idx, layer in enumerate(self.rnn_layers):
             if self.is_cpu:
                 self.is_training = False  # Only use cpu in inference mode for now
                 cell = CudnnCompatibleLSTMCell(num_units=layer['units'])
                 cell.build(tf.TensorShape(
                     input_shape[1:]))  # Require 2 dimension only
             else:
                 cell = CudnnLSTM(num_layers=1,
                                  num_units=layer['units'],
                                  input_mode='linear_input',
                                  direction='unidirectional',
                                  dropout=0.0)
                 cell.build(input_shape)
             weight = {'cell': cell}
             wdrop = layer.get('wdrop', 0.0)
             if self.is_training and wdrop > 0.0:
                 h_var_backup = tf.Variable(initial_value=tf.zeros(
                     shape=[4 * layer['units'], layer['units']]),
                                            trainable=False,
                                            name='h_var_backup_' + str(idx))
                 weight['h_var_backup'] = h_var_backup
             if isinstance(self.projection_dims,
                           int) and self.projection_dims > 0:
                 w_proj = tf.get_variable(
                     name='w_proj_{}'.format(idx),
                     shape=(layer['units'], self.projection_dims),
                     initializer=tf.glorot_uniform_initializer())
                 b_proj = tf.get_variable(
                     name='b_proj_{}'.format(idx),
                     shape=(self.projection_dims, ),
                     initializer=tf.zeros_initializer())
                 input_shape = (None, None, self.projection_dims)
                 weight['w_proj'] = w_proj
                 weight['b_proj'] = b_proj
             else:
                 input_shape = (None, None, layer['units'])
             self.weights.append(weight)
예제 #15
0
import os
import tensorflow as tf
from tensorflow.contrib.cudnn_rnn import CudnnLSTM as CudnnLSTM
inp = tf.zeros([10, 32, 100])
lstm1 = CudnnLSTM(1, 128)
lstm2 = CudnnLSTM(2, 256)
lstm1.build(inp.shape)
lstm2.build(inp.shape)
saver = tf.train.Saver()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    save_path = 'test_cudnn_lstm_save/1'
    if not os.path.exists(save_path):
        os.makedirs(os.path.join(save_path))
    saver.save(sess, save_path)
예제 #16
0
            def __build_uni_model(inputs, name):
                model = {}
                with tf.variable_scope(name, reuse=self.reuse):
                    s = tf.shape(inputs)  # Get input shape
                    # Reshape from [T, B, C] to [T * B, C]
                    inputs = tf.reshape(inputs, [s[0] * s[1], s[2]])
                    with tf.device('/cpu:0'):
                        W = tf.get_variable(
                            shape=[self.char_vocab_size, self.char_vec_size],
                            initializer=tf.glorot_uniform_initializer(),
                            name="embedding_weight")
                        if self.is_training and self.drop_e > 0.0:
                            W = embedding_dropout(W, dropout=self.drop_e)
                        char_embed = tf.nn.embedding_lookup(W, inputs)
                    conv_out = []
                    for fsz, num in self.char_cnn_layers:
                        x = tf.layers.conv1d(
                            char_embed,
                            num,
                            fsz,
                            activation=tf.nn.relu,
                            kernel_initializer=tf.glorot_uniform_initializer(),
                            padding='same')
                        x = tf.reduce_max(x, axis=1)
                        conv_out.append(x)
                    embedding = tf.concat(conv_out, axis=-1)
                    embedding = tf.reshape(
                        embedding,
                        (s[0], s[1], sum(x for _, x in self.char_cnn_layers)))
                    input_shape = s
                    ops = []
                    inputs = embedding
                    layer_outputs = []
                    for idx, l in enumerate(self.rnn_layers):
                        cell = CudnnLSTM(num_layers=1,
                                         num_units=l['units'],
                                         input_mode='linear_input',
                                         direction='unidirectional',
                                         dropout=0.0)
                        saved_state = (tf.get_variable(
                            shape=[1, 1, l['units']],
                            name='c_' + str(idx),
                            trainable=False),
                                       tf.get_variable(
                                           shape=[1, 1, l['units']],
                                           name='h_' + str(idx),
                                           trainable=False))
                        for x in saved_state:
                            tf.add_to_collection(LSTM_SAVED_STATE, x)
                        zeros = tf.zeros([1, input_shape[1], l['units']],
                                         dtype=tf.float32)
                        zero_state = (zeros, zeros)

                        def if_true():
                            return zero_state

                        def if_false():
                            return saved_state

                        drop_i = l.get('drop_i', 0.0)
                        if self.is_training and drop_i > 0.0:
                            inputs = tf.nn.dropout(x=inputs,
                                                   keep_prob=1 - drop_i,
                                                   noise_shape=[
                                                       1, input_shape[1],
                                                       inputs.shape[-1]
                                                   ],
                                                   name='drop_i_' + str(idx))
                        cell.build(inputs.shape)
                        wdrop = l.get('wdrop', 0.0)
                        if self.is_training and wdrop > 0.0:
                            cell_var = cell.variables[0]
                            h_var_backup = tf.Variable(initial_value=tf.zeros(
                                shape=[4 * l['units'], l['units']]),
                                                       trainable=False,
                                                       name='h_var_backup_' +
                                                       str(idx))
                            h_var = cell_var[inputs.shape[-1] * l['units'] *
                                             4:-l['units'] * 8]
                            h_var = tf.reshape(
                                h_var,
                                [4 * l['units'], l['units']]) + h_var_backup
                            keep_prob = 1 - wdrop
                            random_tensor = keep_prob
                            random_tensor += tf.random_uniform(
                                [4 * l['units'], 1], dtype=h_var.dtype)
                            # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
                            binary_tensor = tf.floor(random_tensor)
                            new_h_var = tf.multiply(h_var, binary_tensor)
                            new_h_var = tf.reshape(
                                new_h_var, [4 * l['units'] * l['units']])
                            h_var_backup = tf.assign(
                                h_var_backup,
                                tf.multiply(h_var,
                                            tf.subtract(1.0, binary_tensor)),
                                validate_shape=True,
                                use_locking=True,
                                name='assign_h_var_backup_' + str(idx))
                            new_cell_var = tf.concat([
                                cell_var[:inputs.shape[-1] * l['units'] * 4],
                                new_h_var, cell_var[-l['units'] * 8:]
                            ],
                                                     axis=0,
                                                     name='new_cell_var_' +
                                                     str(idx))
                            op = tf.assign(cell_var,
                                           new_cell_var,
                                           validate_shape=True,
                                           use_locking=True,
                                           name='assign_new_cell_var_' +
                                           str(idx))
                            with tf.control_dependencies([op, h_var_backup]):
                                outputs, state = cell.call(
                                    inputs=inputs,
                                    initial_state=tf.cond(
                                        self.reset_state, if_true, if_false),
                                    training=self.is_training)
                        else:
                            outputs, state = cell.call(
                                inputs=inputs,
                                initial_state=tf.cond(self.reset_state,
                                                      if_true, if_false),
                                training=self.is_training)
                        if isinstance(self.fine_tune_lr, list):
                            outputs = apply_custom_lr(outputs,
                                                      self.fine_tune_lr[idx])
                        drop_o = l.get('drop_o', 0.0)
                        if self.is_training and drop_o > 0.0:
                            outputs = tf.nn.dropout(x=outputs,
                                                    keep_prob=1 - drop_o,
                                                    noise_shape=[
                                                        1, input_shape[1],
                                                        outputs.shape[-1]
                                                    ],
                                                    name='drop_o_' + str(idx))
                        ops.append(
                            tf.assign(saved_state[0],
                                      state[0],
                                      validate_shape=False))
                        ops.append(
                            tf.assign(saved_state[1],
                                      state[1],
                                      validate_shape=False))
                        inputs = outputs
                        layer_outputs.append(outputs)
                    model['layer_outputs'] = layer_outputs
                    ops = tf.group(ops)
                    with tf.control_dependencies([ops]):
                        rnn_outputs = tf.multiply(inputs,
                                                  tf.expand_dims(
                                                      self.seq_masks, axis=-1),
                                                  name='rnn_outputs')
                    model['rnn_outputs'] = rnn_outputs
                    decoder = tf.nn.xw_plus_b(
                        tf.reshape(rnn_outputs, [
                            input_shape[0] * input_shape[1],
                            self.rnn_layers[-1]['units']
                        ]), self.share_decode_W, self.share_decode_b)
                    decoder = tf.reshape(
                        decoder,
                        [input_shape[0], input_shape[1], self.vocab_size])
                    model['decoder'] = decoder
                    return model
예제 #17
0
def BiLSTM(x, filters, dropout=0.0, name='BiLSTM'):
    cudnn_lstm = CudnnLSTM(1, filters, direction='bidirectional', name=name)
    x, _ = cudnn_lstm(x)
    x = tf.nn.dropout(x, 1 - dropout)
    return x