def test_start(self):
        ttt = ttt_env.TicTacToeEnv(strict=True)
        self.assertFalse(ttt.done)

        # At max one move may have been played by the env.
        spaces = ttt_env.get_open_spaces(ttt.board_state)
        num_open_spaces = len(spaces)
        # i.e. either 8 or 9
        self.assertGreater(num_open_spaces, 7)

        # Play a move
        observation, reward, done, unused_info = ttt.step(spaces[0])

        # The environment should also have played a move.
        spaces = ttt_env.get_open_spaces(observation)
        self.assertEqual(num_open_spaces - 2, len(spaces))

        # Since at-max 3 moves have been played, the game can't end.
        self.assertEqual(reward, 0)
        self.assertFalse(done)
    def test_keep_playing(self):
        ttt = ttt_env.TicTacToeEnv(strict=False)
        done = False
        while not done:
            # sample an action from the action space.
            action = ttt.action_space.sample()
            # play it -- could be a no-op since we don't see if positions are empty.
            unused_observation, reward, done, unused_info = ttt.step(action)

        # done is True, so either:
        # we won
        # env won or
        # no space left

        we_won = reward == 1
        env_won = reward == -1
        space = bool(ttt_env.get_open_spaces(ttt.board_state))
        self.assertTrue(we_won or env_won or not space)