def make_tictactoe(dim=1): single_env = MultiAgentTicTacToe(dim) register_env("tictactoe-v0", lambda _: MultiAgentTicTacToe(dim)) obs_space = single_env.observation_space act_space = single_env.action_space return obs_space, act_space, "tictactoe-v0"
def test_not_move_on_turn_hd(): env = MultiAgentTicTacToe(8) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 obs, rets, dones, infos = env.step({0: [9, 2], 1: [9, 6]}) assert rets[0] == -1 assert rets[1] == 1
def test_out_of_turn_over_not_move(): env = MultiAgentTicTacToe() obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 obs, rets, dones, infos = env.step({0: [9, 0], 1: [3, 0]}) assert rets[0] == 1 assert rets[1] == -1
def test_starting_state_hd(): env = MultiAgentTicTacToe(6) obs = env.reset() # Outer keys are players for i in range(2): assert len(obs[i]) == 10 # Board should appear empty to all players for j in range(9): assert obs[i][j] == 0 # Last position is players turn assert obs[i][9] == 0
def test_place_high_dimension(): k = 2 env = MultiAgentTicTacToe(k) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 obs, rets, dones, infos = env.step({0: [4, k - 1], 1: [9, k - 1]}) # Outer keys are players for i in range(2): assert len(obs[i]) == 10 # Board should appear empty to all players for j in range(9): if j == 4: assert obs[i][j] == 1 * k else: assert obs[i][j] == 0 # Last position is players turn assert obs[i][9] == 1 obs, rets, dones, infos = env.step({0: [9, k - 1], 1: [5, k - 1]}) # Outer keys are players for i in range(2): assert len(obs[i]) == 10 # Board should appear empty to all players for j in range(9): if j == 4: assert obs[i][j] == 1 * k elif j == 5: assert obs[i][j] == 2 * k else: assert obs[i][j] == 0 # Last position is players turn assert obs[i][9] == 0
def simulate_rollouts_tictactoe(trainers, dim): env = MultiAgentTicTacToe(dim) total_score = {-1: 0, 0: 0, 1: 0} for i in range(20): obs = env.reset() print("--Rollout" + str(i) + "--") print("First Player: " + str(obs[0][9])) done = False while not done: #print(obs) actions = { i: trainer.compute_action(obs[i], policy_id=policy_mapping_fn(i)) for i, trainer in enumerate(trainers) } # idk why this is suddenly necessary TODO figure that out actions = {k: [x[0] for x in v] for k, v in actions.items()} print(actions) obs, rewards, dones, infos = env.step(actions) done = dones['__all__'] if done: total_score[rewards[0]] += 1 env.render() print("player 0 outcomes: " + str(total_score))
def test_tie_game_hd(): env = MultiAgentTicTacToe(8) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [2, 4], 1: [9, 2]}) env.step({0: [9, 7], 1: [1, 3]}) env.step({0: [4, 4], 1: [9, 4]}) env.step({0: [9, 6], 1: [3, 0]}) obs, rets, dones, infos = env.step({0: [6, 7], 1: [9, 0]}) assert rets[0] == 1 assert rets[1] == -1
def test_place_piece_on_other_piece(): env = MultiAgentTicTacToe() obs = env.reset() # check that it is player 0's move assert obs[0][9] == 0 env.step({0: [4, 0], 1: [9, 0]}) obs, rets, dones, infos = env.step({0: [9, 0], 1: [4, 0]}) assert rets[0] == 1 assert rets[1] == -1
def test_out_of_turn_over_another_peice_hd(): env = MultiAgentTicTacToe(4) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [4, 3], 1: [9, 3]}) obs, rets, dones, infos = env.step({0: [5, 3], 1: [4, 3]}) assert rets[0] == -1 assert rets[1] == 1
def test_diagonal_wins_hd(): env = MultiAgentTicTacToe(20) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [0, 3], 1: [9, 3]}) env.step({0: [9, 5], 1: [1, 0]}) env.step({0: [4, 0], 1: [9, 3]}) env.step({0: [9, 6], 1: [3, 2]}) obs, rets, dones, infos = env.step({0: [8, 6], 1: [9, 3]}) assert rets[0] == 1 assert rets[1] == -1 env = MultiAgentTicTacToe(20) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [2, 6], 1: [9, 5]}) env.step({0: [9, 2], 1: [1, 8]}) env.step({0: [4, 3], 1: [9, 3]}) env.step({0: [9, 7], 1: [3, 4]}) obs, rets, dones, infos = env.step({0: [6, 2], 1: [9, 7]}) assert rets[0] == 1 assert rets[1] == -1
def test_horizontal_wins_hd(): env = MultiAgentTicTacToe(10) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [0, 9], 1: [9, 9]}) env.step({0: [9, 3], 1: [4, 9]}) env.step({0: [1, 4], 1: [9, 9]}) env.step({0: [9, 2], 1: [5, 9]}) obs, rets, dones, infos = env.step({0: [2, 4], 1: [9, 2]}) assert rets[0] == 1 assert rets[1] == -1 env = MultiAgentTicTacToe(10) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [3, 5], 1: [9, 4]}) env.step({0: [9, 7], 1: [0, 7]}) env.step({0: [4, 2], 1: [9, 8]}) env.step({0: [9, 4], 1: [1, 2]}) obs, rets, dones, infos = env.step({0: [5, 4], 1: [9, 0]}) assert rets[0] == 1 assert rets[1] == -1 env = MultiAgentTicTacToe(10) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [6, 4], 1: [9, 6]}) env.step({0: [9, 0], 1: [0, 3]}) env.step({0: [7, 6], 1: [9, 2]}) env.step({0: [9, 2], 1: [1, 3]}) obs, rets, dones, infos = env.step({0: [8, 2], 1: [9, 5]}) assert rets[0] == 1 assert rets[1] == -1
def test_vertical_wins_hd(): env = MultiAgentTicTacToe(5) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [0, 4], 1: [9, 0]}) env.step({0: [9, 2], 1: [4, 2]}) env.step({0: [3, 4], 1: [9, 3]}) env.step({0: [9, 1], 1: [5, 0]}) obs, rets, dones, infos = env.step({0: [6, 2], 1: [9, 4]}) assert rets[0] == 1 assert rets[1] == -1 env = MultiAgentTicTacToe(5) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [1, 4], 1: [9, 0]}) env.step({0: [9, 0], 1: [3, 3]}) env.step({0: [4, 1], 1: [9, 3]}) env.step({0: [9, 0], 1: [5, 0]}) obs, rets, dones, infos = env.step({0: [7, 3], 1: [9, 4]}) assert rets[0] == 1 assert rets[1] == -1 env = MultiAgentTicTacToe(5) obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [2, 1], 1: [9, 2]}) env.step({0: [9, 0], 1: [3, 0]}) env.step({0: [5, 3], 1: [9, 5]}) env.step({0: [9, 0], 1: [4, 0]}) obs, rets, dones, infos = env.step({0: [8, 3], 1: [9, 3]}) assert rets[0] == 1 assert rets[1] == -1
def test_diagonal_wins(): env = MultiAgentTicTacToe() obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [0, 0], 1: [9, 0]}) env.step({0: [9, 0], 1: [1, 0]}) env.step({0: [4, 0], 1: [9, 0]}) env.step({0: [9, 0], 1: [3, 0]}) obs, rets, dones, infos = env.step({0: [8, 0], 1: [9, 0]}) assert rets[0] == 1 assert rets[1] == -1 env = MultiAgentTicTacToe() obs = env.reset() # check that it is plyer 0's move assert obs[0][9] == 0 env.step({0: [2, 0], 1: [9, 0]}) env.step({0: [9, 0], 1: [1, 0]}) env.step({0: [4, 0], 1: [9, 0]}) env.step({0: [9, 0], 1: [3, 0]}) obs, rets, dones, infos = env.step({0: [6, 0], 1: [9, 0]}) assert rets[0] == 1 assert rets[1] == -1