def test_start(self): ttt = ttt_env.TicTacToeEnv(strict=True) self.assertFalse(ttt.done) # At max one move may have been played by the env. spaces = ttt_env.get_open_spaces(ttt.board_state) num_open_spaces = len(spaces) # i.e. either 8 or 9 self.assertGreater(num_open_spaces, 7) # Play a move observation, reward, done, unused_info = ttt.step(spaces[0]) # The environment should also have played a move. spaces = ttt_env.get_open_spaces(observation) self.assertEqual(num_open_spaces - 2, len(spaces)) # Since at-max 3 moves have been played, the game can't end. self.assertEqual(reward, 0) self.assertFalse(done)
def test_keep_playing(self): ttt = ttt_env.TicTacToeEnv(strict=False) done = False while not done: # sample an action from the action space. action = ttt.action_space.sample() # play it -- could be a no-op since we don't see if positions are empty. unused_observation, reward, done, unused_info = ttt.step(action) # done is True, so either: # we won # env won or # no space left we_won = reward == 1 env_won = reward == -1 space = bool(ttt_env.get_open_spaces(ttt.board_state)) self.assertTrue(we_won or env_won or not space)