Exemple #1
0
    def setUp(self):
        self.no_of_players = 2
        self.board_size = 3
        self.marks_required = 3
        self.engine = TicTacToeEngine(self.no_of_players, self.board_size, self.marks_required)

        self.partial_move_sequence = \
            [TicTacToeAction(row, col) for row, col in [(0, 0), (1, 1), (2, 1)]]
        self.full_move_sequence = \
            [TicTacToeAction(row, col) for row, col in list(product(range(self.board_size), range(self.board_size)))]
        self.player_0_winning_move_sequence = \
            [TicTacToeAction(row, col) for row, col in [(2, 0), (0, 1), (1, 1), (0, 0), (0, 2)]]
        self.player_1_winning_move_sequence = \
            [TicTacToeAction(row, col) for row, col in [(2, 2), (0, 1), (2, 0), (0, 0), (1, 1), (0, 2)]]
Exemple #2
0
class TestTicTacToeEngine(TestCase):
    def setUp(self):
        self.no_of_players = 3
        self.board_size = 5
        self.marks_required = 3
        self.engine = TicTacToeEngine(self.no_of_players, self.board_size,
                                      self.marks_required)

        self.partial_move_sequence = \
            [TicTacToeAction(row, col) for row, col in [(0, 0), (1, 0), (2, 0), (0, 1), (1, 4), (2, 3)]]
        self.full_move_sequence = \
            [TicTacToeAction(row, col) for row, col in list(product(range(self.board_size), range(self.board_size)))]
        self.player_0_winning_move_sequence = \
            [TicTacToeAction(row, col) for row, col in [(0, 0), (1, 0), (2, 0), (0, 1), (1, 4), (2, 3), (0, 2)]]
        self.player_1_winning_move_sequence = \
            [TicTacToeAction(row, col) for row, col in [(4, 4), (2, 1), (4, 3), (4, 2), (2, 2), (4, 1), (3, 1), (2, 3)]]
        self.player_2_winning_move_sequence = \
            [TicTacToeAction(row, col) for row, col in [(0, 0), (0, 1), (4, 4), (0, 2), (0, 3), (3, 3), (0, 4), (2, 1), (2, 2)]]

    def init_board(self, actions):
        for action in actions:
            self.engine.make_move(action)

    def test_proper_initialization(self):
        self.assertFalse(self.engine.winnings)
        self.assertTrue(self.engine.players == (Player("Player 0", 0),
                                                Player("Player 1", 1),
                                                Player("Player 2", 2)))
        self.assertTrue(self.engine.current_player == Player("Player 0", 0))
        self.assertTrue(
            np.array_equal(self.engine.current_state.board, np.full((5, 5),
                                                                    -1)))
        self.assertTrue(self.engine.allowed_actions == TicTacToeActionSpace(
            set(self.full_move_sequence)))
        self.assertTrue(
            self.engine.rewards == {
                Player("Player 0", 0): 0,
                Player("Player 1", 1): 0,
                Player("Player 2", 2): 0
            })
        self.assertFalse(self.engine.ended)

    def test_improper_initialization(self):
        self.assertRaises(AssertionError, TicTacToeEngine, 1, 5,
                          5)  # wrong no of players
        self.assertRaises(AssertionError, TicTacToeEngine, 2, 1,
                          5)  # wrong board_size
        self.assertRaises(AssertionError, TicTacToeEngine, 2, 5,
                          6)  # wrong marks_required

    def test_make_move(self):
        self.engine.make_move(TicTacToeAction(0, 0))  # Player 0
        self.assertTrue(self.engine.current_state.board[0][0] == 0)
        self.assertTrue(self.engine.current_player == Player("Player 1", 1))
        self.assertFalse(self.engine.winnings)

        self.engine.make_move(TicTacToeAction(0, 1))  # Player 1
        self.assertTrue(self.engine.current_state.board[0][1] == 1)
        self.assertTrue(self.engine.current_player == Player("Player 2", 2))
        self.assertFalse(self.engine.winnings)

        self.engine.make_move(TicTacToeAction(3, 3))  # Player 2
        self.assertTrue(self.engine.current_state.board[3][3] == 2)
        self.assertTrue(self.engine.current_player == Player("Player 0", 0))
        self.assertFalse(self.engine.winnings)

        self.assertTrue(
            self.engine.rewards == {
                Player("Player 0", 0): 0,
                Player("Player 1", 1): 0,
                Player("Player 2", 2): 0
            })

    def test_make_move_fail(self):
        self.init_board(self.partial_move_sequence)
        self.assertRaises(IllegalMoveError, self.engine.make_move,
                          TicTacToeAction(2, 3))
        self.assertRaises(IndexError, self.engine.make_move,
                          TicTacToeAction(5, 5))

    def test_winnings_and_ended(self):
        self.init_board(self.player_0_winning_move_sequence)
        self.assertEqual(len(self.engine.winnings), 1)
        self.assertTrue(
            self.engine.winnings == (Winning(0, [(0, 0), (0, 1), (0, 2)]), ))
        self.assertTrue(self.engine.ended)
        self.assertTrue(
            self.engine.rewards == {
                Player("Player 0", 0): 1,
                Player("Player 1", 1): -1,
                Player("Player 2", 2): -1
            })

        self.engine.reset()

        self.init_board(self.player_1_winning_move_sequence)
        self.assertEqual(len(self.engine.winnings), 1)
        self.assertTrue(
            self.engine.winnings == (Winning(1, [(2, 1), (2, 2), (2, 3)]), ))
        self.assertTrue(self.engine.ended)
        self.assertTrue(
            self.engine.rewards == {
                Player("Player 0", 0): -1,
                Player("Player 1", 1): 1,
                Player("Player 2", 2): -1
            })

        self.engine.reset()

        self.init_board(self.player_2_winning_move_sequence)
        self.assertEqual(len(self.engine.winnings), 1)
        self.assertTrue(
            self.engine.winnings == (Winning(2, [(2, 2), (3, 3), (4, 4)]), ))
        self.assertTrue(self.engine.ended)
        self.assertTrue(
            self.engine.rewards == {
                Player("Player 0", 0): -1,
                Player("Player 1", 1): -1,
                Player("Player 2", 2): 1
            })

    def test_allowed_actions_normally(self):
        self.init_board(self.partial_move_sequence)
        expected_allowed_actions = TicTacToeActionSpace(
            set(self.full_move_sequence).difference(
                set(self.partial_move_sequence)))
        self.assertTrue(
            self.engine.allowed_actions == expected_allowed_actions)

    def test_allowed_actions_with_no_actions(self):
        self.init_board(self.full_move_sequence)
        self.assertFalse(self.engine.allowed_actions.actions)

    def test_reset(self):
        self.init_board(self.full_move_sequence)
        self.engine.reset()
        self.test_proper_initialization()

        self.init_board(self.player_0_winning_move_sequence)
        self.engine.reset()
        self.test_proper_initialization()

    def test_randomize(self):
        self.engine.randomize()

        self.assertFalse(self.engine.winnings)
        self.assertFalse(self.engine.ended)
        self.assertTrue(self.engine._board._points_placed)
        self.assertTrue(self.engine._board._marks_placed)

    def test_set_proper_player(self):
        self.engine._rewind_to_player(None)
        self.assertEqual(self.engine.current_player, Player("Player 0", 0))

        self.engine._rewind_to_player(0)
        self.assertEqual(self.engine.current_player, Player("Player 0", 0))

        self.engine._rewind_to_player(1)
        self.assertEqual(self.engine.current_player, Player("Player 1", 1))

        self.engine._rewind_to_player(2)
        self.assertEqual(self.engine.current_player, Player("Player 2", 2))

    def test_remove_winning(self):
        self.init_board(self.player_0_winning_move_sequence)
        self.assertEqual(len(self.engine.winnings), 1)
        self.assertTrue(
            self.engine.winnings == (Winning(0, [(0, 0), (0, 1), (0, 2)]), ))
        self.engine._remove_winning(0, (0, 1))

        self.assertFalse(self.engine.winnings)

        self.engine.reset()

        self.init_board(self.player_1_winning_move_sequence)
        self.assertEqual(len(self.engine.winnings), 1)
        self.assertTrue(
            self.engine.winnings == (Winning(1, [(2, 1), (2, 2), (2, 3)]), ))
        self.assertTrue(self.engine.ended)
        self.engine._remove_winning(1, (2, 2))

        self.assertFalse(self.engine.winnings)

    def test_undo_last_move(self):
        self.engine.make_move(TicTacToeAction(0, 0))
        self.assertTrue(self.engine.current_player == Player("Player 1", 1))
        self.assertFalse(self.engine.winnings)
        self.assertFalse(self.engine.ended)

        self.engine._undo_last_move()

        self.assertTrue(self.engine.current_player == Player("Player 0", 0))
        self.assertFalse(self.engine.winnings)
        self.assertFalse(self.engine.ended)

        self.init_board(self.player_0_winning_move_sequence)
        self.assertTrue(self.engine.current_player == Player("Player 1", 1))
        self.assertEqual(len(self.engine.winnings), 1)
        self.assertTrue(
            self.engine.winnings == (Winning(0, [(0, 0), (0, 1), (0, 2)]), ))
        self.assertTrue(self.engine.ended)

        self.engine._undo_last_move()

        self.assertTrue(self.engine.current_player == Player("Player 0", 0))
        self.assertFalse(self.engine.winnings)
        self.assertFalse(self.engine.ended)

        self.engine.reset()

        self.init_board(self.player_2_winning_move_sequence)
        self.assertTrue(self.engine.current_player == Player("Player 0", 0))
        self.assertEqual(len(self.engine.winnings), 1)
        self.assertTrue(
            self.engine.winnings == (Winning(2, [(2, 2), (3, 3), (4, 4)]), ))
        self.assertTrue(self.engine.ended)

        self.engine._undo_last_move()

        self.assertTrue(self.engine.current_player == Player("Player 2", 2))
        self.assertFalse(self.engine.winnings)
        self.assertFalse(self.engine.ended)
Exemple #3
0
class TestTicTacToeEngineAdditional(TestTicTacToeEngine):
    def setUp(self):
        self.no_of_players = 2
        self.board_size = 3
        self.marks_required = 3
        self.engine = TicTacToeEngine(self.no_of_players, self.board_size, self.marks_required)

        self.partial_move_sequence = \
            [TicTacToeAction(row, col) for row, col in [(0, 0), (1, 1), (2, 1)]]
        self.full_move_sequence = \
            [TicTacToeAction(row, col) for row, col in list(product(range(self.board_size), range(self.board_size)))]
        self.player_0_winning_move_sequence = \
            [TicTacToeAction(row, col) for row, col in [(2, 0), (0, 1), (1, 1), (0, 0), (0, 2)]]
        self.player_1_winning_move_sequence = \
            [TicTacToeAction(row, col) for row, col in [(2, 2), (0, 1), (2, 0), (0, 0), (1, 1), (0, 2)]]

    def test_proper_initialization(self):
        self.assertFalse(self.engine.winnings)
        self.assertTrue(self.engine.players == (Player("Player 0", 0), Player("Player 1", 1)))
        self.assertTrue(self.engine.current_player == Player("Player 0", 0))
        self.assertTrue(np.array_equal(self.engine.current_state.board, np.full((3, 3), -1)))
        self.assertTrue(self.engine.allowed_actions == TicTacToeActionSpace(set(self.full_move_sequence)))
        self.assertTrue(self.engine.rewards == {Player("Player 0", 0): 0, Player("Player 1", 1): 0})
        self.assertFalse(self.engine.ended)

    def test_make_move(self):
        self.engine.make_move(TicTacToeAction(2, 2))  # Player 0
        self.assertTrue(self.engine.current_state.board[2][2] == 0)
        self.assertTrue(self.engine.current_player == Player("Player 1", 1))
        self.assertFalse(self.engine.winnings)

        self.engine.make_move(TicTacToeAction(1, 1))  # Player 1
        self.assertTrue(self.engine.current_state.board[1][1] == 1)
        self.assertTrue(self.engine.current_player == Player("Player 0", 0))
        self.assertFalse(self.engine.winnings)

        self.assertTrue(self.engine.rewards == {Player("Player 0", 0): 0, Player("Player 1", 1): 0})

    def test_make_move_fail(self):
        self.init_board(self.partial_move_sequence)
        self.assertRaises(IllegalMoveError, self.engine.make_move, TicTacToeAction(0, 0))
        self.assertRaises(IndexError, self.engine.make_move, TicTacToeAction(5, 5))

    def test_winnings_and_ended(self):
        self.init_board(self.player_0_winning_move_sequence)
        self.assertEqual(len(self.engine.winnings), 1)

        self.assertTrue(self.engine.winnings == (Winning(0, [(0, 2), (1, 1), (2, 0)]), ))
        self.assertTrue(self.engine.ended)
        self.assertTrue(self.engine.rewards == {Player("Player 0", 0): 1, Player("Player 1", 1): -1})

        self.engine.reset()

        self.init_board(self.player_1_winning_move_sequence)
        self.assertEqual(len(self.engine.winnings), 1)
        self.assertTrue(self.engine.winnings == (Winning(1, [(0, 0), (0, 1), (0, 2)]), ))
        self.assertTrue(self.engine.ended)
        self.assertTrue(self.engine.rewards == {Player("Player 0", 0): -1, Player("Player 1", 1): 1})

    def test_set_proper_player(self):
        self.engine._rewind_to_player(None)
        self.assertEqual(self.engine.current_player, Player("Player 0", 0))

        self.engine._rewind_to_player(0)
        self.assertEqual(self.engine.current_player, Player("Player 0", 0))

        self.engine._rewind_to_player(1)
        self.assertEqual(self.engine.current_player, Player("Player 1", 1))

    def test_remove_winning(self):
        pass

    def test_undo_last_move(self):
        pass
 def setUp(self):
     self.engine = TicTacToeEngine(2, 3, 3)
     self.number_of_episodes = 10
     self.agent_path = os.path.join(ABS_PROJECT_ROOT_PATH, "tests", "test_reinforcement_learning", "agent.ai")
from training_platform import EnvironmentServer
from environments.tic_tac_toe.tic_tac_toe_engine import TicTacToeEngine

if __name__ == '__main__':
    server = EnvironmentServer(TicTacToeEngine(2, 3, 3))

    input("Press ENTER after all players have joined")

    for i in range(100):
        print(f"Game number: {i}")
        server.start()

    server.shutdown()
    print("Training platform has been shutdowned!")
Exemple #6
0
    def __init__(self, app, board_size, marks_required, player_mark,
                 opponent_mark, difficulty, game_mode):
        self._app = app
        self._board_size = board_size
        self.marks_required = marks_required
        self._player_mark = player_mark
        self._opponent_mark = opponent_mark
        self._difficulty = difficulty
        self.spectator_mode = game_mode == GameMode.AgentVsAgent

        # Fake player initialisation in spectator mode
        if self.spectator_mode:
            self.show_match_ended = False
            self.show_match_paused = False
            self.next_moves_to_show = 0
            self._fake_player_commands_queue = init_agent_fake_player()
            self._fake_player_agent = BaseAgent.load(
                resolve_agent_file_path(self._player_mark, self._board_size,
                                        self.marks_required))

        self.asys = ActorSystem(ACTOR_SYSTEM_BASE)

        # TicTacToeClientActor initialization
        self.client_actor_address = self.asys.createActor(TicTacToeClientActor)
        self.game_manager_addr = self.asys.createActor(
            GameManager, globalName="GameManager")
        self.match_maker_addr = self.asys.createActor(MatchMaker,
                                                      globalName="MatchMaker")
        self.logger_addr = self.asys.createActor(Logger, globalName="Logger")
        msg = InitTTTClientActorMsg(self.match_maker_addr,
                                    self.game_manager_addr, self.logger_addr,
                                    self.spectator_mode)
        self.tell(self.client_actor_address, msg)

        # Training Platform initialization
        engine = TicTacToeEngine(2, self._board_size, self.marks_required)
        self.server = EnvironmentServer(engine)
        self.log(f"Spawned server")
        players = self.server.players

        # TicTacToeClientActor server joining
        human_player = players[self._player_mark]
        self.tell(self.client_actor_address, JoinServerMsg(human_player))

        # Opponent joining
        agent_player = players[self._opponent_mark]
        matching_agents = AgentsDB.load(
            player=self._opponent_mark,
            board_size=self._board_size,
            marks_required=self.marks_required
        )  # List of all agents that satisfy criteria

        # TODO: Make convenient select agent function (maybe in GUI)
        # For now it's just last agent
        def agent_select(agents):
            if not agents:
                raise ValueError(
                    "There are now agents satisfying given criteria in the Agents Database"
                )
            return agents[-1]

        agent = agent_select(matching_agents)

        # TODO: implement proper difficulty level handling
        # agent.epsilon = 0.0 if self._difficulty == Difficulty.HARD else 0.2
        agent_client = AgentClient(agent)
        self.server.join(agent_client, agent_player)
        self.log(f"Joined opponent")

        # Environment starting
        self.server.start(blocking=False)
        self.log("Started server")

        self._scene = TicTacToeScene(self, app, app.screen, self._board_size,
                                     self._player_mark, self._opponent_mark)
        self.turn = TurnState.YOUR_TURN
        self.winnings = None

        self._app.switch_music(
            os.path.join(
                ABS_PROJECT_ROOT_PATH,
                "game_app/resources/sounds/common/SneakyAdventure.mp3"))
Exemple #7
0
from reinforcement_learning.agents.dqn_agent.dqn_agent import DQNAgent

# Agents building blocks
from reinforcement_learning.agents.common_building_blocks.epsilon_strategy import ConstantEpsilonStrategy, CircleEpsilonStrategy, DecayingSinusEpsilonStrategy

# Training
from reinforcement_learning.simple_training import SimpleTraining

# Agents Database

# To avoid warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

if __name__ == '__main__':
    engine = TicTacToeEngine(2, 3, 3)

    # agents = [NStepAgent(n=5,
    #                      step_size=0.1,
    #                      epsilon_strategy=CircleEpsilonStrategy(starting_epsilon_value=0.1, exploration_part=0.7),
    #                      discount=1),
    #           DQNAgent(step_size=0.01,
    #                    discount=1,
    #                    epsilon_strategy=DecayingSinusEpsilonStrategy(starting_epsilon_value=0.1, exploration_part=0.7),
    #                    fit_period=64,
    #                    batch_size=64,
    #                    max_memory_size=64)]

    agents = [
        NStepAgent(n=5,
                   step_size=0.1,
import sys

from environments.tic_tac_toe.tic_tac_toe_engine import TicTacToeEngine
from training_platform.common import *
from training_platform import EnvironmentServer


if __name__ == '__main__':
    # Commandline parameters parsing
    argc = len(sys.argv)
    if not argc == 4:
        print(f"Invalid arguments number: {argc-1} (should be 3)")
        print("Try again with following arguments:")
        print("python start_server.py <no_of_players> <board_size> <marks_required> ")
        exit()
    no_of_players = int(sys.argv[1])
    board_size = int(sys.argv[2])
    marks_required = int(sys.argv[3])

    server = EnvironmentServer(TicTacToeEngine(no_of_players, board_size, marks_required))

    input("Press ENTER after all players have joined")

    server.start()
    print("Episode has ended!")