def test_step_auto(self, auto_act_mock, act_mock, ai_auto_act_mock):
     auto_act_mock.return_value = True
     act_mock.return_value = 45
     ai_auto_act_mock.return_value = False
     self.env._step_auto()
     expected_space = RenjuSpace(15 * 15)
     expected_space.remove(45)
     expected_state = RenjuState(RenjuBoard(15), PlayerColor.WHITE,
                                 PlayerColor.BLACK).act(45)
     self.assertIsInstance(self.env._policies[PlayerColor.BLACK],
                           InputPolicy)
     self.assertIsInstance(self.env._policies[PlayerColor.WHITE],
                           RandomPolicy)
     self.assertEquals(expected_space, self.env.action_space)
     self.assertEquals(expected_state, self.env._state)
 def test_reset(self, act_mock):
     act_mock.return_value = 100
     observation = self.env._reset()
     expected_space = RenjuSpace(15 * 15)
     expected_space.remove(100)
     expected_state = RenjuState(RenjuBoard(15), PlayerColor.WHITE,
                                 PlayerColor.BLACK).act(100)
     expected_observation = np.array([0 for _ in range(15 * 15)])
     np.put(expected_observation, 100, 1)
     self.assertIsInstance(self.env._policies[PlayerColor.BLACK],
                           RandomPolicy)
     self.assertIsInstance(self.env._policies[PlayerColor.WHITE],
                           InputPolicy)
     self.assertEquals(expected_space, self.env.action_space)
     self.assertEquals(expected_state, self.env._state)
     self.assertTrue(all(expected_observation == observation))
 def test_init(self):
     players = ['input', 'random']
     env = RenjuEnv(players, 15, True)
     self.assertIsInstance(env._policies[PlayerColor.BLACK], InputPolicy)
     self.assertIsInstance(env._policies[PlayerColor.WHITE], RandomPolicy)
     self.assertEquals(RenjuSpace(15 * 15), env.action_space)
     self.assertEquals(
         RenjuState(RenjuBoard(15), PlayerColor.WHITE, PlayerColor.BLACK),
         env._state)
 def test_render_not_change_state(self, finish_mock):
     finish_mock.return_value = True
     self.env._render()
     self.assertIsInstance(self.env._policies[PlayerColor.BLACK],
                           InputPolicy)
     self.assertIsInstance(self.env._policies[PlayerColor.WHITE],
                           RandomPolicy)
     self.assertEquals(RenjuSpace(15 * 15), self.env.action_space)
     self.assertEquals(
         RenjuState(RenjuBoard(15), PlayerColor.WHITE, PlayerColor.BLACK),
         self.env._state)
 def test_rewards_finished(self, finsih_mock, get_reward_mock,
                           get_opponent_reward_mock):
     finsih_mock.return_value = True
     get_reward_mock.return_value = 10.0
     get_opponent_reward_mock.return_value = -5.0
     rewards = self.env.get_rewards()
     self.assertEquals([-5.0, 10.0], rewards)
     # Also test that the state not changed
     self.assertIsInstance(self.env._policies[PlayerColor.BLACK],
                           InputPolicy)
     self.assertIsInstance(self.env._policies[PlayerColor.WHITE],
                           RandomPolicy)
     self.assertEquals(RenjuSpace(15 * 15), self.env.action_space)
     self.assertEquals(
         RenjuState(RenjuBoard(15), PlayerColor.WHITE, PlayerColor.BLACK),
         self.env._state)
 def test_step_valid(self, ai_act_mock):
     ai_act_mock.return_value = 191
     actual_observation, actual_reward, actual_is_finish, actual_state_dict = self.env._step(
         79)
     expected_space = RenjuSpace(15 * 15)
     expected_space.remove(79)
     expected_space.remove(191)
     expected_state = RenjuState(RenjuBoard(15), PlayerColor.WHITE,
                                 PlayerColor.BLACK).act(79).act(191)
     expected_observation = np.array([0 for _ in range(15 * 15)])
     np.put(expected_observation, [79, 191], [1, 2])
     self.assertIsInstance(self.env._policies[PlayerColor.BLACK],
                           InputPolicy)
     self.assertIsInstance(self.env._policies[PlayerColor.WHITE],
                           RandomPolicy)
     self.assertEquals(expected_space, self.env.action_space)
     self.assertEquals(expected_state, self.env._state)
     self.assertTrue(all(expected_observation == actual_observation))
     self.assertEquals(0, actual_reward)
     self.assertEquals(False, actual_is_finish)
     self.assertEquals({'state': expected_state}, actual_state_dict)
Exemple #7
0
 def generate(self, space_size: int) -> DiscreteSpace:
     return RenjuSpace(space_size)