def test_compute_probab_ratios(self): p_old = np.array([[ [np.log(0.1), np.log(0.2), np.log(0.6), np.log(0.1)], [np.log(0.4), np.log(0.1), np.log(0.4), np.log(0.1)], [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], [np.log(0.1), np.log(0.2), np.log(0.6), np.log(0.1)], [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], ], [ [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], [np.log(0.1), np.log(0.1), np.log(0.4), np.log(0.4)], [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], [np.log(0.1), np.log(0.2), np.log(0.6), np.log(0.1)], [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], ]]) p_new = np.array([[ [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], [np.log(0.4), np.log(0.1), np.log(0.1), np.log(0.3)], [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)], [np.log(0.3), np.log(0.1), np.log(0.5), np.log(0.1)], [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)], ], [ [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)], [np.log(0.1), np.log(0.1), np.log(0.2), np.log(0.6)], [np.log(0.3), np.log(0.1), np.log(0.3), np.log(0.3)], [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)], [np.log(0.1), np.log(0.2), np.log(0.1), np.log(0.6)], ]]) actions = np.array([[1, 2, 0, 1], [0, 3, 3, 0]]) mask = np.array([[1, 1, 0, 0], [1, 1, 1, 0]]) probab_ratios = ppo.compute_probab_ratios(p_new, p_old, actions, mask) self.assertAllClose( np.array([ [0.1 / 0.2, 0.1 / 0.4, 0.0, 0.0], [0.1 / 0.3, 0.6 / 0.4, 0.3 / 0.1, 0.0], ]), probab_ratios)
def test_compute_probab_ratios(self): p_old = np.array([[ [0.1, 0.2, 0.6, 0.1], [0.4, 0.1, 0.4, 0.1], [0.3, 0.1, 0.5, 0.1], [0.1, 0.2, 0.6, 0.1], ], [ [0.3, 0.1, 0.5, 0.1], [0.1, 0.1, 0.4, 0.4], [0.3, 0.1, 0.5, 0.1], [0.1, 0.2, 0.6, 0.1], ]]) p_new = np.array([[ [0.3, 0.1, 0.5, 0.1], [0.4, 0.1, 0.1, 0.3], [0.1, 0.2, 0.1, 0.6], [0.3, 0.1, 0.5, 0.1], ], [ [0.1, 0.2, 0.1, 0.6], [0.1, 0.1, 0.2, 0.6], [0.3, 0.1, 0.3, 0.3], [0.1, 0.2, 0.1, 0.6], ]]) actions = np.array([[1, 2, 0, 1], [0, 3, 3, 0]]) mask = np.array([[1, 1, 0, 0], [1, 1, 1, 0]]) probab_ratios = ppo.compute_probab_ratios(p_old, p_new, actions, mask) self.assertAllClose( np.array([ [0.1 / 0.2, 0.1 / 0.4, 0.0, 0.0], [0.1 / 0.3, 0.6 / 0.4, 0.3 / 0.1, 0.0], ]), probab_ratios)