def test_evaluate_episode_delta(self): MC = MonteCarlo() deltas = [] for _ in range(100): deltas.append(MC.evaluate_episode_delta(MC.generate_episode())) self.assertTrue(-1 <= deltas[-1] <= 1) self.assertTrue(any(d for d in deltas)) print('sample path value max deltas:', deltas)
def test_generate_episode(self): MC = MonteCarlo() paths = [] for _ in range(10): states = MC.generate_episode() self.assertLess(states[-1], hash(MC.board)) paths.append(states) print('sample state paths:', paths)
def test_evaluate_episode(self): MC = MonteCarlo() for _ in range(10): states = MC.generate_episode() pre_visits = np.array([MC.visits[s] for s in [0] + states]) pre_values = np.array([MC.values[s] for s in [0] + states]) MC.evaluate_episode(states) post_visits = np.array([MC.visits[s] for s in [0] + states]) post_values = np.array([MC.values[s] for s in [0] + states]) diff_values = pre_values - post_values diff_visits = pre_visits - post_visits print('sample path value deltas:', diff_values) self.assertTrue(np.all(diff_visits) == 1)