コード例 #1
0
ファイル: cvae.py プロジェクト: mtian95/NeuralDialog-CVAE
    def get_rnncell(cell_type, cell_size, keep_prob, num_layer):
        # thanks for this solution from @dimeldo
        cells = []
        for _ in range(num_layer):
            if cell_type == "gru":
                cell = rnn_cell.GRUCell(cell_size)
            else:
                cell = rnn_cell.LSTMCell(cell_size,
                                         use_peepholes=False,
                                         forget_bias=1.0)

            if keep_prob < 1.0:
                cell = rnn_cell.DropoutWrapper(cell,
                                               output_keep_prob=keep_prob)

            cells.append(cell)

        if num_layer > 1:
            cell = rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
        else:
            cell = cells[0]

        return cell
コード例 #2
0
def _CreateCudnnCompatibleCanonicalRNN(rnn, inputs, is_bidi=False, scope=None):
  mode = rnn.rnn_mode
  num_units = rnn.num_units
  num_layers = rnn.num_layers

  # To reuse cuDNN-trained models, must use cudnn compatible rnn cells.
  if mode == CUDNN_LSTM:
    single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleLSTMCell(num_units)
  elif mode == CUDNN_GRU:
    single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleGRUCell(num_units)
  elif mode == CUDNN_RNN_TANH:
    single_cell = (lambda: rnn_cell_impl.BasicRNNCell(num_units, math_ops.tanh))
  elif mode == CUDNN_RNN_RELU:
    single_cell = (
        lambda: rnn_cell_impl.BasicRNNCell(num_units, gen_nn_ops.relu))
  else:
    raise ValueError("%s is not supported!" % mode)

  if not is_bidi:
    cell = rnn_cell_impl.MultiRNNCell(
        [single_cell() for _ in range(num_layers)])
    return rnn_lib.dynamic_rnn(
        cell, inputs, dtype=dtypes.float32, time_major=True, scope=scope)
  else:
    cells_fw = [single_cell() for _ in range(num_layers)]
    cells_bw = [single_cell() for _ in range(num_layers)]

    (outputs, output_state_fw,
     output_state_bw) = contrib_rnn_lib.stack_bidirectional_dynamic_rnn(
         cells_fw,
         cells_bw,
         inputs,
         dtype=dtypes.float32,
         time_major=True,
         scope=scope)
    return outputs, (output_state_fw, output_state_bw)
コード例 #3
0
 def __init__(self, name=None):
   super(KerasNetworkTFRNNs, self).__init__(name=name)
   self._cell = rnn_cell_impl.MultiRNNCell(
       [rnn_cell_impl.LSTMCell(1) for _ in range(2)])
コード例 #4
0
 def testBasicLSTMCell(self):
     for dtype in [dtypes.float16, dtypes.float32]:
         np_dtype = dtype.as_numpy_dtype
         with self.test_session(graph=ops.Graph()) as sess:
             with variable_scope.variable_scope(
                     "root",
                     initializer=init_ops.constant_initializer(0.5)):
                 x = array_ops.zeros([1, 2], dtype=dtype)
                 m = array_ops.zeros([1, 8], dtype=dtype)
                 cell = rnn_cell_impl.MultiRNNCell([
                     rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False)
                     for _ in range(2)
                 ],
                                                   state_is_tuple=False)
                 self.assertEqual(cell.dtype, None)
                 g, out_m = cell(x, m)
                 # Layer infers the input type.
                 self.assertEqual(cell.dtype, dtype.name)
                 expected_variable_names = [
                     "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
                     rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
                     "root/multi_rnn_cell/cell_0/basic_lstm_cell/%s:0" %
                     rnn_cell_impl._BIAS_VARIABLE_NAME,
                     "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
                     rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
                     "root/multi_rnn_cell/cell_1/basic_lstm_cell/%s:0" %
                     rnn_cell_impl._BIAS_VARIABLE_NAME
                 ]
                 self.assertEqual(
                     expected_variable_names,
                     [v.name for v in cell.trainable_variables])
                 self.assertFalse(cell.non_trainable_variables)
                 sess.run([variables_lib.global_variables_initializer()])
                 res = sess.run(
                     [g, out_m], {
                         x.name: np.array([[1., 1.]]),
                         m.name: 0.1 * np.ones([1, 8])
                     })
                 self.assertEqual(len(res), 2)
                 variables = variables_lib.global_variables()
                 self.assertEqual(expected_variable_names,
                                  [v.name for v in variables])
                 # The numbers in results were not calculated, this is just a
                 # smoke test.
                 self.assertAllClose(
                     res[0], np.array([[0.240, 0.240]], dtype=np_dtype),
                     1e-2)
                 expected_mem = np.array([[
                     0.689, 0.689, 0.448, 0.448, 0.398, 0.398, 0.240, 0.240
                 ]],
                                         dtype=np_dtype)
                 self.assertAllClose(res[1], expected_mem, 1e-2)
             with variable_scope.variable_scope(
                     "other",
                     initializer=init_ops.constant_initializer(0.5)):
                 # Test BasicLSTMCell with input_size != num_units.
                 x = array_ops.zeros([1, 3], dtype=dtype)
                 m = array_ops.zeros([1, 4], dtype=dtype)
                 g, out_m = rnn_cell_impl.BasicLSTMCell(
                     2, state_is_tuple=False)(x, m)
                 sess.run([variables_lib.global_variables_initializer()])
                 res = sess.run(
                     [g, out_m], {
                         x.name: np.array([[1., 1., 1.]], dtype=np_dtype),
                         m.name: 0.1 * np.ones([1, 4], dtype=np_dtype)
                     })
                 self.assertEqual(len(res), 2)
コード例 #5
0
def _CreateStackedLstmCell(*cell_sizes):
    subcells = [rnn_cell_impl.LSTMCell(cell_size) for cell_size in cell_sizes]
    return rnn_cell_impl.MultiRNNCell(subcells)
コード例 #6
0
ファイル: seq2seq_model.py プロジェクト: kithan/Attention-OCR
    def __init__(self, encoder_masks, encoder_inputs_tensor, 
            decoder_inputs,
            target_weights,
            target_vocab_size, 
            buckets,
            target_embedding_size,
            attn_num_layers,
            attn_num_hidden,
            forward_only,
            use_gru):
        """Create the model.

        Args:
          source_vocab_size: size of the source vocabulary.
          target_vocab_size: size of the target vocabulary.
          buckets: a list of pairs (I, O), where I specifies maximum input length
            that will be processed in that bucket, and O specifies maximum output
            length. Training instances that have inputs longer than I or outputs
            longer than O will be pushed to the next bucket and padded accordingly.
            We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
          size: number of units in each layer of the model.
          num_layers: number of layers in the model.
          max_gradient_norm: gradients will be clipped to maximally this norm.
          learning_rate: learning rate to start with.
          learning_rate_decay_factor: decay learning rate by this much when needed.
          use_lstm: if true, we use LSTM cells instead of GRU cells.
          num_samples: number of samples for sampled softmax.
          forward_only: if set, we do not construct the backward pass in the model.
        """
        self.encoder_inputs_tensor = encoder_inputs_tensor
        self.decoder_inputs = decoder_inputs
        self.target_weights = target_weights
        self.target_vocab_size = target_vocab_size
        self.buckets = buckets
        self.encoder_masks = encoder_masks

        # Create the internal multi-layer cell for our RNN
        single_cell = rnn_cell_impl.BasicLSTMCell(attn_num_hidden, forget_bias=0.0, state_is_tuple=False)
        if use_gru:
            print("using GRU CELL in decoder")
            single_cell = rnn_cell_impl.GRUCell(attn_num_hidden)
        cell = single_cell

        if attn_num_layers > 1:
            cell = rnn_cell_impl.MultiRNNCell([single_cell] * attn_num_layers, state_is_tuple=False)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(lstm_inputs, decoder_inputs, seq_length, do_decode):

            num_hidden = attn_num_layers * attn_num_hidden
            lstm_fw_cell = rnn_cell_impl.BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False)
            # Backward direction cell
            lstm_bw_cell = rnn_cell_impl.BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False)

            pre_encoder_inputs, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, lstm_inputs,
                initial_state_fw=None, initial_state_bw=None,
                dtype=tf.float32, sequence_length=None, scope=None)

            encoder_inputs = [e*f for e,f in zip(pre_encoder_inputs,encoder_masks[:seq_length])]
            top_states = [array_ops.reshape(e, [-1, 1, num_hidden*2])
                    for e in encoder_inputs]
            attention_states = array_ops.concat(top_states, 1)
            initial_state = tf.concat(axis=1, values=[output_state_fw, output_state_bw])
            outputs, _, attention_weights_history = embedding_attention_decoder(
                    decoder_inputs, initial_state, attention_states, cell,
                    num_symbols=target_vocab_size, 
                    embedding_size=target_embedding_size,
                    num_heads=1,
                    output_size=target_vocab_size, 
                    output_projection=None,
                    feed_previous=do_decode,
                    initial_state_attention=False,
                    attn_num_hidden = attn_num_hidden)
            return outputs, attention_weights_history

        # Our targets are decoder inputs shifted by one.
        targets = [decoder_inputs[i + 1]
                for i in xrange(len(decoder_inputs) - 1)]

        softmax_loss_function = None # default to tf.nn.sparse_softmax_cross_entropy_with_logits

        # Training outputs and losses.
        if forward_only:
            self.outputs, self.losses, self.attention_weights_histories = model_with_buckets(
                    encoder_inputs_tensor, decoder_inputs, targets,
                    self.target_weights, buckets, lambda x, y, z: seq2seq_f(x, y, z, True),
                    softmax_loss_function=softmax_loss_function)
        else:
            self.outputs, self.losses, self.attention_weights_histories = model_with_buckets(
                    encoder_inputs_tensor, decoder_inputs, targets,
                    self.target_weights, buckets, lambda x, y, z: seq2seq_f(x, y, z, False),
                    softmax_loss_function=softmax_loss_function)
コード例 #7
0
 def testIndyLSTMCell(self):
   for dtype in [dtypes.float16, dtypes.float32]:
     np_dtype = dtype.as_numpy_dtype
     with self.session(graph=ops.Graph()) as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
         x = array_ops.zeros([1, 2], dtype=dtype)
         state_0 = (array_ops.zeros([1, 2], dtype=dtype),) * 2
         state_1 = (array_ops.zeros([1, 2], dtype=dtype),) * 2
         cell = rnn_cell_impl.MultiRNNCell(
             [contrib_rnn_cell.IndyLSTMCell(2) for _ in range(2)])
         self.assertEqual(cell.dtype, None)
         self.assertEqual("cell-0", cell._checkpoint_dependencies[0].name)
         self.assertEqual("cell-1", cell._checkpoint_dependencies[1].name)
         cell.get_config()  # Should not throw an error
         g, (out_state_0, out_state_1) = cell(x, (state_0, state_1))
         # Layer infers the input type.
         self.assertEqual(cell.dtype, dtype.name)
         expected_variable_names = [
             "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s_w:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s_u:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_0/indy_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s_w:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s_u:0" %
             rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
             "root/multi_rnn_cell/cell_1/indy_lstm_cell/%s:0" %
             rnn_cell_impl._BIAS_VARIABLE_NAME
         ]
         self.assertEqual(expected_variable_names,
                          [v.name for v in cell.trainable_variables])
         self.assertFalse(cell.non_trainable_variables)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run(
             [g, out_state_0, out_state_1], {
                 x.name: np.array([[1., 1.]]),
                 state_0[0].name: 0.1 * np.ones([1, 2]),
                 state_0[1].name: 0.1 * np.ones([1, 2]),
                 state_1[0].name: 0.1 * np.ones([1, 2]),
                 state_1[1].name: 0.1 * np.ones([1, 2]),
             })
         self.assertEqual(len(res), 3)
         variables = variables_lib.global_variables()
         self.assertEqual(expected_variable_names, [v.name for v in variables])
         # Only check the range of outputs as this is just a smoke test.
         self.assertAllInRange(res[0], -1.0, 1.0)
         self.assertAllInRange(res[1], -1.0, 1.0)
         self.assertAllInRange(res[2], -1.0, 1.0)
       with variable_scope.variable_scope(
           "other", initializer=init_ops.constant_initializer(0.5)):
         # Test IndyLSTMCell with input_size != num_units.
         x = array_ops.zeros([1, 3], dtype=dtype)
         state = (array_ops.zeros([1, 2], dtype=dtype),) * 2
         g, out_state = contrib_rnn_cell.IndyLSTMCell(2)(x, state)
         sess.run([variables_lib.global_variables_initializer()])
         res = sess.run(
             [g, out_state], {
                 x.name: np.array([[1., 1., 1.]], dtype=np_dtype),
                 state[0].name: 0.1 * np.ones([1, 2], dtype=np_dtype),
                 state[1].name: 0.1 * np.ones([1, 2], dtype=np_dtype),
             })
         self.assertEqual(len(res), 2)