Example #1
0
    def test_without_std_share_network_output_values(self, mock_normal,
                                                     output_dim, hidden_dim,
                                                     init_std):
        mock_normal.return_value = 0.5
        model = GaussianGRUModel(output_dim=output_dim,
                                 hidden_dim=hidden_dim,
                                 std_share_network=False,
                                 hidden_nonlinearity=None,
                                 recurrent_nonlinearity=None,
                                 hidden_w_init=self.default_initializer,
                                 recurrent_w_init=self.default_initializer,
                                 output_w_init=self.default_initializer,
                                 init_std=init_std)
        step_hidden_var = tf.compat.v1.placeholder(shape=(self.batch_size,
                                                          hidden_dim),
                                                   name='step_hidden',
                                                   dtype=tf.float32)
        (action_var, mean_var, step_mean_var, log_std_var, step_log_std_var,
         step_hidden, hidden_init_var,
         dist) = model.build(self.input_var, self.step_input_var,
                             step_hidden_var)

        hidden1 = hidden2 = np.full((self.batch_size, hidden_dim),
                                    hidden_init_var.eval())

        mean, log_std = self.sess.run(
            [mean_var, log_std_var],
            feed_dict={self.input_var: self.obs_inputs})

        for i in range(self.time_step):
            action, mean1, log_std1, hidden1 = self.sess.run(
                [action_var, step_mean_var, step_log_std_var, step_hidden],
                feed_dict={
                    self.step_input_var: self.obs_input,
                    step_hidden_var: hidden1
                })

            hidden2 = recurrent_step_gru(input_val=self.obs_input,
                                         num_units=hidden_dim,
                                         step_hidden=hidden2,
                                         w_x_init=0.1,
                                         w_h_init=0.1,
                                         b_init=0.,
                                         nonlinearity=None,
                                         gate_nonlinearity=None)

            output_nonlinearity = np.full(
                (np.prod(hidden2.shape[1:]), output_dim), 0.1)
            output2 = np.matmul(hidden2, output_nonlinearity)
            assert np.allclose(mean1, output2)
            expected_log_std = np.full((self.batch_size, output_dim),
                                       np.log(init_std))
            assert np.allclose(log_std1, expected_log_std)
            assert np.allclose(hidden1, hidden2)

            expected_action = 0.5 * np.exp(log_std1) + mean1
            assert np.allclose(action, expected_action)
Example #2
0
    def test_std_share_network_output_values(self, mock_normal, output_dim,
                                             hidden_dim):
        mock_normal.return_value = 0.5
        model = GaussianGRUModel(output_dim=output_dim,
                                 hidden_dim=hidden_dim,
                                 std_share_network=True,
                                 hidden_nonlinearity=None,
                                 recurrent_nonlinearity=None,
                                 hidden_w_init=self.default_initializer,
                                 recurrent_w_init=self.default_initializer,
                                 output_w_init=self.default_initializer)
        step_hidden_var = tf.compat.v1.placeholder(shape=(self.batch_size,
                                                          hidden_dim),
                                                   name='step_hidden',
                                                   dtype=tf.float32)
        (_, step_mean_var, step_log_std_var, step_hidden,
         hidden_init_var) = model.build(self.input_var, self.step_input_var,
                                        step_hidden_var).outputs

        hidden1 = hidden2 = np.full((self.batch_size, hidden_dim),
                                    hidden_init_var.eval())

        for _ in range(self.time_step):
            mean1, log_std1, hidden1 = self.sess.run(
                [step_mean_var, step_log_std_var, step_hidden],
                feed_dict={
                    self.step_input_var: self.obs_input,
                    step_hidden_var: hidden1
                })

            hidden2 = recurrent_step_gru(input_val=self.obs_input,
                                         num_units=hidden_dim,
                                         step_hidden=hidden2,
                                         w_x_init=0.1,
                                         w_h_init=0.1,
                                         b_init=0.,
                                         nonlinearity=None,
                                         gate_nonlinearity=None)

            output_nonlinearity = np.full(
                (np.prod(hidden2.shape[1:]), output_dim), 0.1)
            output2 = np.matmul(hidden2, output_nonlinearity)
            assert np.allclose(mean1, output2)
            assert np.allclose(log_std1, output2)
            assert np.allclose(hidden1, hidden2)
Example #3
0
    def test_output_value_trainable_hidden_and_cell(self, time_step, input_dim,
                                                    output_dim):
        obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.)
        obs_input = np.full((self.batch_size, input_dim), 1.)

        input_var = tf.placeholder(tf.float32,
                                   shape=(None, None, input_dim),
                                   name='input')
        step_input_var = tf.placeholder(tf.float32,
                                        shape=(None, input_dim),
                                        name='step_input')
        output_nonlinearity = tf.keras.layers.Dense(
            units=output_dim,
            activation=None,
            kernel_initializer=tf.constant_initializer(1))
        with tf.variable_scope('GRU'):
            self.gru = gru(all_input_var=input_var,
                           name='gru',
                           gru_cell=self.gru_cell,
                           step_input_var=step_input_var,
                           step_hidden_var=self.step_hidden_var,
                           hidden_state_init_trainable=True,
                           output_nonlinearity_layer=output_nonlinearity)

        self.sess.run(tf.global_variables_initializer())

        # Compute output by doing t step() on the gru cell
        outputs_t, output_t, h_t, hidden_init = self.gru
        hidden = np.full((self.batch_size, self.hidden_dim),
                         hidden_init.eval())

        output, hidden = self.sess.run([output_t, h_t],
                                       feed_dict={
                                           step_input_var: obs_input,
                                           self.step_hidden_var: hidden,
                                       })  # noqa: E126
        with tf.variable_scope('GRU/gru', reuse=True):
            hidden_init_var = tf.get_variable(name='initial_hidden')
            assert hidden_init_var in tf.trainable_variables()

        full_output1 = self.sess.run(outputs_t,
                                     feed_dict={input_var: obs_inputs})

        hidden2 = np.full((self.batch_size, self.hidden_dim),
                          hidden_init.eval())
        stack_hidden = None
        for i in range(time_step):
            hidden2 = recurrent_step_gru(input_val=obs_inputs[:, i, :],
                                         num_units=self.hidden_dim,
                                         step_hidden=hidden2,
                                         w_x_init=1.,
                                         w_h_init=1.,
                                         b_init=0.,
                                         nonlinearity=np.tanh,
                                         gate_nonlinearity=lambda x: 1. /
                                         (1. + np.exp(-x)))
            if stack_hidden is None:
                stack_hidden = hidden2[:, np.newaxis, :]
            else:
                stack_hidden = np.concatenate(
                    (stack_hidden, hidden2[:, np.newaxis, :]), axis=1)
        output_nonlinearity = np.full((np.prod(hidden2.shape[1:]), output_dim),
                                      1.)
        full_output2 = np.matmul(stack_hidden, output_nonlinearity)
        assert np.allclose(full_output1, full_output2)