class BasicMctsModelTest(unittest.TestCase): def setUp(self): self.env = TicTacToeEnv() self.dynamics_model = BasicMctsModel(self.env) def test_get_predicted_value_and_final_info_discounted(self): self.dynamics_model = BasicMctsModel(self.env, discount=0.9) # Check some conditions first. states = [0] * 9 states[4] = 1 states[0] = 4 self.assertEqual((False, 0.0), self.env.check(states)) self.assertEqual(1, self.env.opponent_play(states)) predicted_value, final_info = self.dynamics_model.get_predicted_value_and_final_info( states) self.assertEqual(-.9, predicted_value) # Game ended for an illegal move. self.assertEqual([4, 4, 0, 0, 1, 1, 0, 0, 0], final_info[0]) # states self.assertEqual(5, final_info[1]) # action def test_get_predicted_value_and_final_info(self): # Check some conditions first. states = [0] * 9 states[4] = 1 states[0] = 4 self.assertEqual((False, 0.0), self.env.check(states)) self.assertEqual(1, self.env.opponent_play(states)) predicted_value, final_info = self.dynamics_model.get_predicted_value_and_final_info( states) self.assertEqual(-1., predicted_value) # Game ended for an illegal move. self.assertEqual([4, 4, 0, 0, 1, 1, 0, 0, 0], final_info[0]) # states self.assertEqual(5, final_info[1]) # action def test_step(self): self.dynamics_model.step([0] * 9, 0) # The above is a simulation step, so it should not affect the real environment. self.assertEqual([0] * 9, self.env.get_states())
class TicTacToeEnvTest(unittest.TestCase): def setUp(self): self.env = TicTacToeEnv() def test_check(self): self.assertEqual((False, 0.), self.env.check([0] * 8 + [1])) self.assertEqual((True, -1.), self.env.check([4, 4, 4, 0, 1, 1, 0, 0, 0])) self.assertEqual((True, 1.), self.env.check([4, 4, 0, 1, 1, 1, 0, 0, 0])) self.assertEqual((False, 0.), self.env.check([4, 4, 1, 1, 4, 1, 1, 0, 0])) self.assertEqual((True, 0.), self.env.check([4, 1, 4, 1, 4, 1, 1, 4, 1])) def test_legal_actions(self): states = [0] * 9 states[3] = 1 states[7] = 4 states[8] = 1 self.assertEqual([0, 1, 2, 4, 5, 6], self.env.legal_actions(states)) def test_opponent_play(self): # Chooses the first available space. self.assertEqual(0, self.env.opponent_play([0] * 8 + [1])) self.assertEqual(8, self.env.opponent_play([1] * 8 + [0])) def test_opponent_play_random(self): self.env = TicTacToeEnv(r_seed=0, use_random=True) s = set() for i in range(100): s.add(self.env.opponent_play([0, 1, 4, 0, 0, 0, 0, 0, 0])) self.assertEqual([0] + list(range(3, 9)), list(s)) def test_step(self): self.env.set_states([4, 4, 0, 0, 1, 1, 0, 0, 0]) states, is_final, reward = self.env.step(3) self.assertEqual([4, 4, 0, 1, 1, 1, 0, 0, 0], states) self.assertTrue(is_final) self.assertEqual(1., reward)