Beispiel #1
0
 def test_evaluate_episode_delta(self):
     MC = MonteCarlo()
     deltas = []
     for _ in range(100):
         deltas.append(MC.evaluate_episode_delta(MC.generate_episode()))
         self.assertTrue(-1 <= deltas[-1] <= 1)
     self.assertTrue(any(d for d in deltas))
     print('sample path value max deltas:', deltas)
Beispiel #2
0
 def test_generate_episode(self):
     MC = MonteCarlo()
     paths = []
     for _ in range(10):
         states = MC.generate_episode()
         self.assertLess(states[-1], hash(MC.board))
         paths.append(states)
     print('sample state paths:', paths)
Beispiel #3
0
 def test_evaluate_episode(self):
     MC = MonteCarlo()
     for _ in range(10):
         states = MC.generate_episode()
         pre_visits = np.array([MC.visits[s] for s in [0] + states])
         pre_values = np.array([MC.values[s] for s in [0] + states])
         MC.evaluate_episode(states)
         post_visits = np.array([MC.visits[s] for s in [0] + states])
         post_values = np.array([MC.values[s] for s in [0] + states])
         diff_values = pre_values - post_values
         diff_visits = pre_visits - post_visits
         print('sample path value deltas:', diff_values)
         self.assertTrue(np.all(diff_visits) == 1)