Esempio n. 1
0
 def test_compute_returns_shape_does_not_match_error(self):
     with pytest.raises(AssertionError) as excinfo:
         compute_returns(0.0, np.arange(5), np.arange(6), 0.9)
     with pytest.raises(AssertionError) as excinfo:
         compute_returns(0.0, np.arange(6), np.arange(5), 0.9)
     with pytest.raises(AssertionError) as excinfo:
         compute_returns(np.arange(5), np.ones((6, 5)), np.ones((5, 5)),
                         0.9)
     with pytest.raises(AssertionError) as excinfo:
         compute_returns(np.arange(5), np.ones((5, 5)), np.ones((6, 5)),
                         0.9)
     with pytest.raises(AssertionError) as excinfo:
         compute_returns(np.arange(6), np.ones((5, 5)), np.ones((5, 5)),
                         0.9)
Esempio n. 2
0
    def fetch(self, gamma, lam):
        assert self.size() > 1

        step_length = self.size() - 1
        obs_t = np.array(self.obs_t)[:step_length]
        actions_t = np.array(self.actions_t)[:step_length]
        rewards_tp1 = np.array(self.rewards_t)[1:step_length + 1]
        terminals_tp1 = np.array(self.terminals_t)[1:step_length + 1]
        values_t = np.array(self.values_t)[:step_length]
        log_probs_t = np.array(self.log_probs_t)[:step_length]
        bootstrap_value = self.values_t[step_length]

        returns_t = compute_returns(bootstrap_value, rewards_tp1,
                                    terminals_tp1, gamma)
        advs_t = compute_gae(bootstrap_value, rewards_tp1, values_t,
                             terminals_tp1, gamma, lam)

        # normalize advantage
        advs_t = (advs_t - np.mean(advs_t)) / (np.std(advs_t) + 1e-8)

        return {
            'obs_t': obs_t,
            'actions_t': actions_t,
            'log_probs_t': log_probs_t,
            'returns_t': returns_t,
            'advantages_t': advs_t,
            'values_t': values_t
        }
Esempio n. 3
0
 def test_compute_returns_one_d_array(self):
     bootstrap_value = 1.0
     rewards = np.array([1.0, 2.0, 3.0])
     terminals = np.array([0.0, 1.0, 0.0])
     answer = np.array([2.8, 2.0, 3.9])
     returns = compute_returns(bootstrap_value, rewards, terminals, 0.9)
     self.assertTrue(np.all(returns == answer))
Esempio n. 4
0
 def test_compute_returns_two_d_array(self):
     bootstrap_value = np.array([1.0, 2.0, 0.0])
     rewards = np.array([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]])
     terminals = np.array([[0.0, 0.0, 1.0], [0.0, 1.0, 0.0]])
     answer = np.array([[3.61, 4.7, 3.0], [2.9, 3.0, 4.0]])
     returns = compute_returns(bootstrap_value, rewards, terminals, 0.9)
     self.assertTrue(np.all(returns == answer))
Esempio n. 5
0
 def test_compute_returns_not_ndarray_error(self):
     with pytest.raises(AssertionError) as excinfo:
         compute_returns(0.0, np.arange(5), range(5), 0.9)
     with pytest.raises(AssertionError) as excinfo:
         compute_returns(0.0, range(5), np.arange(5), 0.9)
     with pytest.raises(AssertionError) as excinfo:
         compute_returns(0.0, np.ones((5, 5)), np.ones((5, 5)), 0.9)