コード例 #1
0
 def test_clone(self, obs_dim, action_dim, hidden_sizes):
     env = GarageEnv(
         DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
     qf = DiscreteMLPDuelingQFunction(env_spec=env.spec,
                                      hidden_sizes=hidden_sizes)
     qf_clone = qf.clone('another_qf')
     assert qf_clone._hidden_sizes == qf._hidden_sizes
コード例 #2
0
    def test_build(self, obs_dim, action_dim):
        env = GymEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        qf = DiscreteMLPDuelingQFunction(env_spec=env.spec)
        env.reset()
        obs = env.step(1).observation

        output1 = self.sess.run(qf.q_vals, feed_dict={qf.input: [obs]})

        input_var = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, ) + obs_dim)
        q_vals = qf.build(input_var, 'another')
        output2 = self.sess.run(q_vals, feed_dict={input_var: [obs]})

        assert np.array_equal(output1, output2)
コード例 #3
0
    def test_output_shape(self, obs_dim, action_dim):
        env = GymEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        qf = DiscreteMLPDuelingQFunction(env_spec=env.spec)
        env.reset()
        obs = env.step(1).observation

        outputs = self.sess.run(qf.q_vals, feed_dict={qf.input: [obs]})
        assert outputs.shape == (1, action_dim)
コード例 #4
0
    def test_get_action(self, obs_dim, action_dim, hidden_sizes):
        env = GymEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        qf = DiscreteMLPDuelingQFunction(env_spec=env.spec,
                                         hidden_sizes=hidden_sizes,
                                         hidden_w_init=tf.ones_initializer(),
                                         output_w_init=tf.ones_initializer())
        obs = np.full(obs_dim, 1)

        expected_output = np.full(action_dim,
                                  obs_dim[-1] * np.prod(hidden_sizes))

        outputs = self.sess.run(qf.q_vals, feed_dict={qf.input: [obs]})
        assert np.array_equal(outputs[0], expected_output)

        outputs = self.sess.run(qf.q_vals,
                                feed_dict={qf.input: [obs, obs, obs]})
        for output in outputs:
            assert np.array_equal(output, expected_output)
コード例 #5
0
    def test_is_pickleable(self, obs_dim, action_dim):
        env = GymEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        qf = DiscreteMLPDuelingQFunction(env_spec=env.spec)
        env.reset()
        obs = env.step(1).observation

        with tf.compat.v1.variable_scope('DiscreteMLPDuelingQFunction',
                                         reuse=True):
            bias = tf.compat.v1.get_variable('state_value/hidden_0/bias')
        # assign it to all one
        bias.load(tf.ones_like(bias).eval())

        output1 = self.sess.run(qf.q_vals, feed_dict={qf.input: [obs]})

        h_data = pickle.dumps(qf)
        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            qf_pickled = pickle.loads(h_data)
            output2 = sess.run(qf_pickled.q_vals,
                               feed_dict={qf_pickled.input: [obs]})

        assert np.array_equal(output1, output2)