コード例 #1
0
def make_tictactoe(dim=1):
    single_env = MultiAgentTicTacToe(dim)
    register_env("tictactoe-v0", lambda _: MultiAgentTicTacToe(dim))
    obs_space = single_env.observation_space
    act_space = single_env.action_space

    return obs_space, act_space, "tictactoe-v0"
コード例 #2
0
def test_not_move_on_turn_hd():
    env = MultiAgentTicTacToe(8)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    obs, rets, dones, infos = env.step({0: [9, 2], 1: [9, 6]})

    assert rets[0] == -1

    assert rets[1] == 1
コード例 #3
0
def test_out_of_turn_over_not_move():
    env = MultiAgentTicTacToe()
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    obs, rets, dones, infos = env.step({0: [9, 0], 1: [3, 0]})

    assert rets[0] == 1

    assert rets[1] == -1
コード例 #4
0
def test_starting_state_hd():
    env = MultiAgentTicTacToe(6)
    obs = env.reset()

    #  Outer keys are players
    for i in range(2):
        assert len(obs[i]) == 10

        #  Board should appear empty to all players
        for j in range(9):
            assert obs[i][j] == 0

        #  Last position is players turn
        assert obs[i][9] == 0
コード例 #5
0
def test_place_high_dimension():
    k = 2
    env = MultiAgentTicTacToe(k)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    obs, rets, dones, infos = env.step({0: [4, k - 1], 1: [9, k - 1]})

    #  Outer keys are players
    for i in range(2):
        assert len(obs[i]) == 10

        #  Board should appear empty to all players
        for j in range(9):
            if j == 4:
                assert obs[i][j] == 1 * k
            else:
                assert obs[i][j] == 0

        #  Last position is players turn
        assert obs[i][9] == 1

    obs, rets, dones, infos = env.step({0: [9, k - 1], 1: [5, k - 1]})

    #  Outer keys are players
    for i in range(2):
        assert len(obs[i]) == 10

        #  Board should appear empty to all players
        for j in range(9):
            if j == 4:
                assert obs[i][j] == 1 * k
            elif j == 5:
                assert obs[i][j] == 2 * k
            else:
                assert obs[i][j] == 0

        #  Last position is players turn
        assert obs[i][9] == 0
コード例 #6
0
def simulate_rollouts_tictactoe(trainers, dim):
    env = MultiAgentTicTacToe(dim)

    total_score = {-1: 0, 0: 0, 1: 0}

    for i in range(20):
        obs = env.reset()

        print("--Rollout" + str(i) + "--")
        print("First Player: " + str(obs[0][9]))
        done = False
        while not done:
            #print(obs)
            actions = {
                i: trainer.compute_action(obs[i],
                                          policy_id=policy_mapping_fn(i))
                for i, trainer in enumerate(trainers)
            }

            #  idk why this is suddenly necessary TODO figure that out
            actions = {k: [x[0] for x in v] for k, v in actions.items()}

            print(actions)
            obs, rewards, dones, infos = env.step(actions)

            done = dones['__all__']

            if done:
                total_score[rewards[0]] += 1

            env.render()

    print("player 0 outcomes: " + str(total_score))
コード例 #7
0
def test_tie_game_hd():
    env = MultiAgentTicTacToe(8)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [2, 4], 1: [9, 2]})
    env.step({0: [9, 7], 1: [1, 3]})
    env.step({0: [4, 4], 1: [9, 4]})
    env.step({0: [9, 6], 1: [3, 0]})
    obs, rets, dones, infos = env.step({0: [6, 7], 1: [9, 0]})

    assert rets[0] == 1

    assert rets[1] == -1
コード例 #8
0
def test_place_piece_on_other_piece():
    env = MultiAgentTicTacToe()
    obs = env.reset()

    #  check that it is player 0's move
    assert obs[0][9] == 0

    env.step({0: [4, 0], 1: [9, 0]})

    obs, rets, dones, infos = env.step({0: [9, 0], 1: [4, 0]})

    assert rets[0] == 1

    assert rets[1] == -1
コード例 #9
0
def test_out_of_turn_over_another_peice_hd():
    env = MultiAgentTicTacToe(4)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [4, 3], 1: [9, 3]})

    obs, rets, dones, infos = env.step({0: [5, 3], 1: [4, 3]})

    assert rets[0] == -1

    assert rets[1] == 1
コード例 #10
0
def test_diagonal_wins_hd():
    env = MultiAgentTicTacToe(20)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [0, 3], 1: [9, 3]})
    env.step({0: [9, 5], 1: [1, 0]})
    env.step({0: [4, 0], 1: [9, 3]})
    env.step({0: [9, 6], 1: [3, 2]})
    obs, rets, dones, infos = env.step({0: [8, 6], 1: [9, 3]})

    assert rets[0] == 1

    assert rets[1] == -1

    env = MultiAgentTicTacToe(20)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [2, 6], 1: [9, 5]})
    env.step({0: [9, 2], 1: [1, 8]})
    env.step({0: [4, 3], 1: [9, 3]})
    env.step({0: [9, 7], 1: [3, 4]})
    obs, rets, dones, infos = env.step({0: [6, 2], 1: [9, 7]})

    assert rets[0] == 1

    assert rets[1] == -1
コード例 #11
0
def test_horizontal_wins_hd():
    env = MultiAgentTicTacToe(10)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [0, 9], 1: [9, 9]})
    env.step({0: [9, 3], 1: [4, 9]})
    env.step({0: [1, 4], 1: [9, 9]})
    env.step({0: [9, 2], 1: [5, 9]})
    obs, rets, dones, infos = env.step({0: [2, 4], 1: [9, 2]})

    assert rets[0] == 1

    assert rets[1] == -1

    env = MultiAgentTicTacToe(10)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [3, 5], 1: [9, 4]})
    env.step({0: [9, 7], 1: [0, 7]})
    env.step({0: [4, 2], 1: [9, 8]})
    env.step({0: [9, 4], 1: [1, 2]})
    obs, rets, dones, infos = env.step({0: [5, 4], 1: [9, 0]})

    assert rets[0] == 1

    assert rets[1] == -1

    env = MultiAgentTicTacToe(10)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [6, 4], 1: [9, 6]})
    env.step({0: [9, 0], 1: [0, 3]})
    env.step({0: [7, 6], 1: [9, 2]})
    env.step({0: [9, 2], 1: [1, 3]})
    obs, rets, dones, infos = env.step({0: [8, 2], 1: [9, 5]})

    assert rets[0] == 1

    assert rets[1] == -1
コード例 #12
0
def test_vertical_wins_hd():
    env = MultiAgentTicTacToe(5)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [0, 4], 1: [9, 0]})
    env.step({0: [9, 2], 1: [4, 2]})
    env.step({0: [3, 4], 1: [9, 3]})
    env.step({0: [9, 1], 1: [5, 0]})
    obs, rets, dones, infos = env.step({0: [6, 2], 1: [9, 4]})

    assert rets[0] == 1

    assert rets[1] == -1

    env = MultiAgentTicTacToe(5)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [1, 4], 1: [9, 0]})
    env.step({0: [9, 0], 1: [3, 3]})
    env.step({0: [4, 1], 1: [9, 3]})
    env.step({0: [9, 0], 1: [5, 0]})
    obs, rets, dones, infos = env.step({0: [7, 3], 1: [9, 4]})

    assert rets[0] == 1

    assert rets[1] == -1

    env = MultiAgentTicTacToe(5)
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [2, 1], 1: [9, 2]})
    env.step({0: [9, 0], 1: [3, 0]})
    env.step({0: [5, 3], 1: [9, 5]})
    env.step({0: [9, 0], 1: [4, 0]})
    obs, rets, dones, infos = env.step({0: [8, 3], 1: [9, 3]})

    assert rets[0] == 1

    assert rets[1] == -1
コード例 #13
0
def test_diagonal_wins():
    env = MultiAgentTicTacToe()
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [0, 0], 1: [9, 0]})
    env.step({0: [9, 0], 1: [1, 0]})
    env.step({0: [4, 0], 1: [9, 0]})
    env.step({0: [9, 0], 1: [3, 0]})
    obs, rets, dones, infos = env.step({0: [8, 0], 1: [9, 0]})

    assert rets[0] == 1

    assert rets[1] == -1

    env = MultiAgentTicTacToe()
    obs = env.reset()

    #  check that it is plyer 0's move
    assert obs[0][9] == 0

    env.step({0: [2, 0], 1: [9, 0]})
    env.step({0: [9, 0], 1: [1, 0]})
    env.step({0: [4, 0], 1: [9, 0]})
    env.step({0: [9, 0], 1: [3, 0]})
    obs, rets, dones, infos = env.step({0: [6, 0], 1: [9, 0]})

    assert rets[0] == 1

    assert rets[1] == -1