Ejemplo n.º 1
0
 def testGRUCell(self):
     with self.test_session() as sess:
         with variable_scope.variable_scope(
                 "root", initializer=init_ops.constant_initializer(0.5)):
             x = array_ops.zeros([1, 2])
             m = array_ops.zeros([1, 2])
             g, _ = rnn_cell_impl.GRUCell(2)(x, m)
             sess.run([variables_lib.global_variables_initializer()])
             res = sess.run([g], {
                 x.name: np.array([[1., 1.]]),
                 m.name: np.array([[0.1, 0.1]])
             })
             # Smoke test
             self.assertAllClose(res[0], [[0.175991, 0.175991]])
         with variable_scope.variable_scope(
                 "other", initializer=init_ops.constant_initializer(0.5)):
             # Test GRUCell with input_size != num_units.
             x = array_ops.zeros([1, 3])
             m = array_ops.zeros([1, 2])
             g, _ = rnn_cell_impl.GRUCell(2)(x, m)
             sess.run([variables_lib.global_variables_initializer()])
             res = sess.run(
                 [g], {
                     x.name: np.array([[1., 1., 1.]]),
                     m.name: np.array([[0.1, 0.1]])
                 })
             # Smoke test
             self.assertAllClose(res[0], [[0.156736, 0.156736]])
Ejemplo n.º 2
0
    def testMultiRNNCellWithStateTuple(self):
        with self.test_session() as sess:
            with variable_scope.variable_scope(
                    "root", initializer=init_ops.constant_initializer(0.5)):
                x = array_ops.zeros([1, 2])
                m_bad = array_ops.zeros([1, 4])
                m_good = (array_ops.zeros([1, 2]), array_ops.zeros([1, 2]))

                # Test incorrectness of state
                with self.assertRaisesRegexp(ValueError,
                                             "Expected state .* a tuple"):
                    rnn_cell_impl.MultiRNNCell(
                        [rnn_cell_impl.GRUCell(2) for _ in range(2)],
                        state_is_tuple=True)(x, m_bad)

                _, ml = rnn_cell_impl.MultiRNNCell(
                    [rnn_cell_impl.GRUCell(2) for _ in range(2)],
                    state_is_tuple=True)(x, m_good)

                sess.run([variables_lib.global_variables_initializer()])
                res = sess.run(
                    ml, {
                        x.name: np.array([[1., 1.]]),
                        m_good[0].name: np.array([[0.1, 0.1]]),
                        m_good[1].name: np.array([[0.1, 0.1]])
                    })

                # The numbers in results were not calculated, this is just a
                # smoke test.  However, these numbers should match those of
                # the test testMultiRNNCell.
                self.assertAllClose(res[0], [[0.175991, 0.175991]])
                self.assertAllClose(res[1], [[0.13248, 0.13248]])
Ejemplo n.º 3
0
    def testResidualWrapperWithSlice(self):
        with self.test_session() as sess:
            with variable_scope.variable_scope(
                    "root", initializer=init_ops.constant_initializer(0.5)):
                x = array_ops.zeros([1, 5])
                m = array_ops.zeros([1, 3])
                base_cell = rnn_cell_impl.GRUCell(3)
                g, m_new = base_cell(x, m)
                variable_scope.get_variable_scope().reuse_variables()

                def residual_with_slice_fn(inp, out):
                    inp_sliced = array_ops.slice(inp, [0, 0], [-1, 3])
                    return inp_sliced + out

                g_res, m_new_res = rnn_cell_impl.ResidualWrapper(
                    base_cell, residual_with_slice_fn)(x, m)
                sess.run([variables_lib.global_variables_initializer()])
                res_g, res_g_res, res_m_new, res_m_new_res = sess.run(
                    [g, g_res, m_new, m_new_res], {
                        x: np.array([[1., 1., 1., 1., 1.]]),
                        m: np.array([[0.1, 0.1, 0.1]])
                    })
                # Residual connections
                self.assertAllClose(res_g_res, res_g + [1., 1., 1.])
                # States are left untouched
                self.assertAllClose(res_m_new, res_m_new_res)
Ejemplo n.º 4
0
    def testResidualWrapperWithSlice(self):
        wrapper_type = rnn_cell_wrapper_v2.ResidualWrapper
        x = ops.convert_to_tensor_v2_with_dispatch(np.array(
            [[1., 1., 1., 1., 1.]]),
                                                   dtype="float32")
        m = ops.convert_to_tensor_v2_with_dispatch(np.array([[0.1, 0.1, 0.1]]),
                                                   dtype="float32")
        base_cell = rnn_cell_impl.GRUCell(
            3,
            kernel_initializer=init_ops.constant_initializer(0.5),
            bias_initializer=init_ops.constant_initializer(0.5))
        g, m_new = base_cell(x, m)

        def residual_with_slice_fn(inp, out):
            inp_sliced = array_ops.slice(inp, [0, 0], [-1, 3])
            return inp_sliced + out

        g_res, m_new_res = wrapper_type(base_cell, residual_with_slice_fn)(x,
                                                                           m)
        self.evaluate([variables_lib.global_variables_initializer()])
        res_g, res_g_res, res_m_new, res_m_new_res = self.evaluate(
            [g, g_res, m_new, m_new_res])
        # Residual connections
        self.assertAllClose(res_g_res, res_g + [1., 1., 1.])
        # States are left untouched
        self.assertAllClose(res_m_new, res_m_new_res)
Ejemplo n.º 5
0
    def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self):
        if not test.is_gpu_available():
            # Can't perform this test w/o a GPU
            return

        gpu_dev = test.gpu_device_name()
        with self.test_session(use_gpu=True) as sess:
            with variable_scope.variable_scope(
                    "root", initializer=init_ops.constant_initializer(0.5)):
                x = array_ops.zeros([1, 1, 3])
                cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3),
                                                   gpu_dev)
                with ops.device("/cpu:0"):
                    outputs, _ = rnn.dynamic_rnn(cell=cell,
                                                 inputs=x,
                                                 dtype=dtypes.float32)
                run_metadata = config_pb2.RunMetadata()
                opts = config_pb2.RunOptions(
                    trace_level=config_pb2.RunOptions.FULL_TRACE)

                sess.run([variables_lib.global_variables_initializer()])
                _ = sess.run(outputs, options=opts, run_metadata=run_metadata)

            cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata)
            self.assertFalse(
                [s for s in cpu_stats if "gru_cell" in s.node_name])
            self.assertTrue(
                [s for s in gpu_stats if "gru_cell" in s.node_name])
Ejemplo n.º 6
0
    def testResidualWrapper(self):
        with self.test_session() as sess:
            with variable_scope.variable_scope(
                    "root", initializer=init_ops.constant_initializer(0.5)):
                x = array_ops.zeros([1, 3])
                m = array_ops.zeros([1, 3])
                base_cell = rnn_cell_impl.GRUCell(3)
                g, m_new = base_cell(x, m)
                variable_scope.get_variable_scope().reuse_variables()
                wrapper_object = rnn_cell_impl.ResidualWrapper(base_cell)
                (name, dep), = wrapper_object._checkpoint_dependencies
                wrapper_object.get_config()  # Should not throw an error
                self.assertIs(dep, base_cell)
                self.assertEqual("cell", name)

                g_res, m_new_res = wrapper_object(x, m)
                sess.run([variables_lib.global_variables_initializer()])
                res = sess.run([g, g_res, m_new, m_new_res], {
                    x: np.array([[1., 1., 1.]]),
                    m: np.array([[0.1, 0.1, 0.1]])
                })
                # Residual connections
                self.assertAllClose(res[1], res[0] + [1., 1., 1.])
                # States are left untouched
                self.assertAllClose(res[2], res[3])
Ejemplo n.º 7
0
  def testRNNCellSerialization(self):
    for cell in [
        rnn_cell_impl.LSTMCell(32, use_peepholes=True, cell_clip=True),
        rnn_cell_impl.BasicLSTMCell(32, dtype=dtypes.float32),
        rnn_cell_impl.BasicRNNCell(32, activation="relu", dtype=dtypes.float32),
        rnn_cell_impl.GRUCell(32, dtype=dtypes.float32)
    ]:
      with self.cached_session():
        x = keras.Input((None, 5))
        layer = keras.layers.RNN(cell)
        y = layer(x)
        model = keras.models.Model(x, y)
        model.compile(optimizer="rmsprop", loss="mse")

        # Test basic case serialization.
        x_np = np.random.random((6, 5, 5))
        y_np = model.predict(x_np)
        weights = model.get_weights()
        config = layer.get_config()
        # The custom_objects is important here since rnn_cell_impl is
        # not visible as a Keras layer, and also has a name conflict with
        # keras.LSTMCell and GRUCell.
        layer = keras.layers.RNN.from_config(
            config,
            custom_objects={
                "BasicRNNCell": rnn_cell_impl.BasicRNNCell,
                "GRUCell": rnn_cell_impl.GRUCell,
                "LSTMCell": rnn_cell_impl.LSTMCell,
                "BasicLSTMCell": rnn_cell_impl.BasicLSTMCell
            })
        y = layer(x)
        model = keras.models.Model(x, y)
        model.set_weights(weights)
        y_np_2 = model.predict(x_np)
        self.assertAllClose(y_np, y_np_2, atol=1e-4)
Ejemplo n.º 8
0
 def testDeviceWrapper(self):
   with variable_scope.variable_scope(
       "root", initializer=init_ops.constant_initializer(0.5)):
     x = array_ops.zeros([1, 3])
     m = array_ops.zeros([1, 3])
     cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), "/cpu:14159")
     outputs, _ = cell(x, m)
     self.assertTrue("cpu:14159" in outputs.device.lower())
  def testDeviceWrapper(self):
    wrapper_type = rnn_cell_wrapper_v2.DeviceWrapper
    x = array_ops.zeros([1, 3])
    m = array_ops.zeros([1, 3])
    cell = rnn_cell_impl.GRUCell(3)
    wrapped_cell = wrapper_type(cell, "/cpu:0")
    (name, dep), = wrapped_cell._checkpoint_dependencies
    wrapped_cell.get_config()  # Should not throw an error
    self.assertIs(dep, cell)
    self.assertEqual("cell", name)

    outputs, _ = wrapped_cell(x, m)
    self.assertIn("cpu:0", outputs.device.lower())
Ejemplo n.º 10
0
    def testDeviceWrapper(self):
        with variable_scope.variable_scope(
                "root", initializer=init_ops.constant_initializer(0.5)):
            x = array_ops.zeros([1, 3])
            m = array_ops.zeros([1, 3])
            wrapped = rnn_cell_impl.GRUCell(3)
            cell = rnn_cell_impl.DeviceWrapper(wrapped, "/cpu:14159")
            (name, dep), = cell._checkpoint_dependencies
            self.assertIs(dep, wrapped)
            self.assertEqual("cell", name)

            outputs, _ = cell(x, m)
            self.assertTrue("cpu:14159" in outputs.device.lower())
Ejemplo n.º 11
0
 def testOutputProjectionWrapper(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([1, 3])
       m = array_ops.zeros([1, 3])
       cell = contrib_rnn.OutputProjectionWrapper(rnn_cell_impl.GRUCell(3), 2)
       g, new_m = cell(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run([g, new_m], {
           x.name: np.array([[1., 1., 1.]]),
           m.name: np.array([[0.1, 0.1, 0.1]])
       })
       self.assertEqual(res[1].shape, (1, 3))
       # The numbers in results were not calculated, this is just a smoke test.
       self.assertAllClose(res[0], [[0.231907, 0.231907]])
Ejemplo n.º 12
0
    def get_rnncell(cell_type, cell_size, keep_prob, num_layer):
        if cell_type == "gru":
            cell = rnn_cell.GRUCell(cell_size)
        else:
            cell = rnn_cell.LSTMCell(cell_size,
                                     use_peepholes=False,
                                     forget_bias=1.0)

        if keep_prob < 1.0:
            cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob)

        if num_layer > 1:
            cell = rnn_cell.MultiRNNCell([cell] * num_layer,
                                         state_is_tuple=True)

        return cell
Ejemplo n.º 13
0
 def testMultiRNNCell(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([1, 2])
       m = array_ops.zeros([1, 4])
       _, ml = rnn_cell_impl.MultiRNNCell(
           [rnn_cell_impl.GRUCell(2)
            for _ in range(2)], state_is_tuple=False)(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run(ml, {
           x.name: np.array([[1., 1.]]),
           m.name: np.array([[0.1, 0.1, 0.1, 0.1]])
       })
       # The numbers in results were not calculated, this is just a smoke test.
       self.assertAllClose(res, [[0.175991, 0.175991, 0.13248, 0.13248]])
Ejemplo n.º 14
0
 def testEmbeddingWrapper(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([1, 1], dtype=dtypes.int32)
       m = array_ops.zeros([1, 2])
       embedding_cell = contrib_rnn.EmbeddingWrapper(
           rnn_cell_impl.GRUCell(2), embedding_classes=3, embedding_size=2)
       self.assertEqual(embedding_cell.output_size, 2)
       g, new_m = embedding_cell(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run(
           [g, new_m],
           {x.name: np.array([[1]]),
            m.name: np.array([[0.1, 0.1]])})
       self.assertEqual(res[1].shape, (1, 2))
       # The numbers in results were not calculated, this is just a smoke test.
       self.assertAllClose(res[0], [[0.17139, 0.17139]])
Ejemplo n.º 15
0
 def testResidualWrapper(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([1, 3])
       m = array_ops.zeros([1, 3])
       base_cell = rnn_cell_impl.GRUCell(3)
       g, m_new = base_cell(x, m)
       variable_scope.get_variable_scope().reuse_variables()
       g_res, m_new_res = rnn_cell_impl.ResidualWrapper(base_cell)(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run([g, g_res, m_new, m_new_res], {
           x: np.array([[1., 1., 1.]]),
           m: np.array([[0.1, 0.1, 0.1]])
       })
       # Residual connections
       self.assertAllClose(res[1], res[0] + [1., 1., 1.])
       # States are left untouched
       self.assertAllClose(res[2], res[3])
Ejemplo n.º 16
0
    def get_rnncell(cell_type, cell_size, keep_prob, num_layer):
        # thanks for this solution from @dimeldo
        cells = []
        for _ in range(num_layer):
            if cell_type == "gru":
                cell = rnn_cell.GRUCell(cell_size)
            else:
                cell = rnn_cell.LSTMCell(cell_size, use_peepholes=False, forget_bias=1.0)

            if keep_prob < 1.0:
                cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_prob)

            cells.append(cell)

        if num_layer > 1:
            cell = rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
        else:
            cell = cells[0]

        return cell
  def testResidualWrapper(self):
    wrapper_type = rnn_cell_wrapper_v2.ResidualWrapper
    x = ops.convert_to_tensor(np.array([[1., 1., 1.]]))
    m = ops.convert_to_tensor(np.array([[0.1, 0.1, 0.1]]))
    base_cell = rnn_cell_impl.GRUCell(
        3, kernel_initializer=init_ops.constant_initializer(0.5),
        bias_initializer=init_ops.constant_initializer(0.5))
    g, m_new = base_cell(x, m)
    wrapper_object = wrapper_type(base_cell)
    (name, dep), = wrapper_object._checkpoint_dependencies
    wrapper_object.get_config()  # Should not throw an error
    self.assertIs(dep, base_cell)
    self.assertEqual("cell", name)

    g_res, m_new_res = wrapper_object(x, m)
    self.evaluate([variables_lib.global_variables_initializer()])
    res = self.evaluate([g, g_res, m_new, m_new_res])
    # Residual connections
    self.assertAllClose(res[1], res[0] + [1., 1., 1.])
    # States are left untouched
    self.assertAllClose(res[2], res[3])
Ejemplo n.º 18
0
    def __init__(self, encoder_masks, encoder_inputs_tensor, 
            decoder_inputs,
            target_weights,
            target_vocab_size, 
            buckets,
            target_embedding_size,
            attn_num_layers,
            attn_num_hidden,
            forward_only,
            use_gru):
        """Create the model.

        Args:
          source_vocab_size: size of the source vocabulary.
          target_vocab_size: size of the target vocabulary.
          buckets: a list of pairs (I, O), where I specifies maximum input length
            that will be processed in that bucket, and O specifies maximum output
            length. Training instances that have inputs longer than I or outputs
            longer than O will be pushed to the next bucket and padded accordingly.
            We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
          size: number of units in each layer of the model.
          num_layers: number of layers in the model.
          max_gradient_norm: gradients will be clipped to maximally this norm.
          learning_rate: learning rate to start with.
          learning_rate_decay_factor: decay learning rate by this much when needed.
          use_lstm: if true, we use LSTM cells instead of GRU cells.
          num_samples: number of samples for sampled softmax.
          forward_only: if set, we do not construct the backward pass in the model.
        """
        self.encoder_inputs_tensor = encoder_inputs_tensor
        self.decoder_inputs = decoder_inputs
        self.target_weights = target_weights
        self.target_vocab_size = target_vocab_size
        self.buckets = buckets
        self.encoder_masks = encoder_masks

        # Create the internal multi-layer cell for our RNN
        single_cell = rnn_cell_impl.BasicLSTMCell(attn_num_hidden, forget_bias=0.0, state_is_tuple=False)
        if use_gru:
            print("using GRU CELL in decoder")
            single_cell = rnn_cell_impl.GRUCell(attn_num_hidden)
        cell = single_cell

        if attn_num_layers > 1:
            cell = rnn_cell_impl.MultiRNNCell([single_cell] * attn_num_layers, state_is_tuple=False)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(lstm_inputs, decoder_inputs, seq_length, do_decode):

            num_hidden = attn_num_layers * attn_num_hidden
            lstm_fw_cell = rnn_cell_impl.BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False)
            # Backward direction cell
            lstm_bw_cell = rnn_cell_impl.BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=False)

            pre_encoder_inputs, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, lstm_inputs,
                initial_state_fw=None, initial_state_bw=None,
                dtype=tf.float32, sequence_length=None, scope=None)

            encoder_inputs = [e*f for e,f in zip(pre_encoder_inputs,encoder_masks[:seq_length])]
            top_states = [array_ops.reshape(e, [-1, 1, num_hidden*2])
                    for e in encoder_inputs]
            attention_states = array_ops.concat(top_states, 1)
            initial_state = tf.concat(axis=1, values=[output_state_fw, output_state_bw])
            outputs, _, attention_weights_history = embedding_attention_decoder(
                    decoder_inputs, initial_state, attention_states, cell,
                    num_symbols=target_vocab_size, 
                    embedding_size=target_embedding_size,
                    num_heads=1,
                    output_size=target_vocab_size, 
                    output_projection=None,
                    feed_previous=do_decode,
                    initial_state_attention=False,
                    attn_num_hidden = attn_num_hidden)
            return outputs, attention_weights_history

        # Our targets are decoder inputs shifted by one.
        targets = [decoder_inputs[i + 1]
                for i in xrange(len(decoder_inputs) - 1)]

        softmax_loss_function = None # default to tf.nn.sparse_softmax_cross_entropy_with_logits

        # Training outputs and losses.
        if forward_only:
            self.outputs, self.losses, self.attention_weights_histories = model_with_buckets(
                    encoder_inputs_tensor, decoder_inputs, targets,
                    self.target_weights, buckets, lambda x, y, z: seq2seq_f(x, y, z, True),
                    softmax_loss_function=softmax_loss_function)
        else:
            self.outputs, self.losses, self.attention_weights_histories = model_with_buckets(
                    encoder_inputs_tensor, decoder_inputs, targets,
                    self.target_weights, buckets, lambda x, y, z: seq2seq_f(x, y, z, False),
                    softmax_loss_function=softmax_loss_function)