Esempio n. 1
0
def test_Game_update_won(state, expected):
    # arrange
    game = Game()
    game.state = np.reshape(state, game.board_shape)

    # act
    game._update_won()

    # assert
    assert game.won == expected
Esempio n. 2
0
def test_Game_determine_reward(won, expected):
    # arrange
    game = Game()
    game.won = won
    marker = 1

    # act
    reward = game.determine_reward(marker)

    # assert
    assert reward == expected
Esempio n. 3
0
def test_NeuralPlayer_policy(net):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    marker = 1
    state = np.reshape((0, 1, -1, 0, 1, 0, -1, 0, 0), game.board_shape)
    game.state = state

    move_values = agent._policy(marker, game)

    assert isinstance(move_values, list)
Esempio n. 4
0
def test_NeuralPlayer_play(net):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    marker = 1
    state = np.reshape((0, 1, -1, 0, 1, 0, -1, 0, 0), game.board_shape)
    game.state = state
    actions = state_to_actions(tuple(state.flatten()), game.ind_to_loc,
                               game.empty_marker)

    loc = agent.play(marker, game)

    assert isinstance(loc, tuple)
    assert loc in actions
Esempio n. 5
0
def test_Game_mark(loc, marker, expected):
    # arrange
    game = Game()
    prev_turn = 1
    game.turn = prev_turn
    game.state[1, 1] = -1
    prev_mark = game.state[loc[0], loc[1]]

    # act
    valid, _ = game.mark(loc, marker)
    expected_turn = int(marker * -1) if valid else prev_turn
    expected_mark = marker if valid else prev_mark

    # assert
    assert valid == expected
    assert game.turn == expected_turn
    assert game.state[loc[0], loc[1]] == expected_mark
Esempio n. 6
0
def test_NeuralPlayer_equivalent_states_to_reward(net):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    state = np.reshape((0, 1, -1, 0, 1, 0, -1, 0, 0), game.board_shape)

    equiv_states, equiv_transforms = agent._equivalent_states_to_reward(state)

    assert len(equiv_states) == len(equiv_transforms)
Esempio n. 7
0
def test_NeuralPlayer_adjust_state_for_marker(net, marker, expected):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    state = np.reshape((0, 1, -1, 0, 1, 0, -1, 0, 0), game.board_shape)
    expected_mod = np.reshape(expected, game.board_shape)

    state_mod = agent._adjust_state_for_marker(state, marker)

    assert (state_mod == expected_mod).all()
Esempio n. 8
0
def test_NeuralPlayer_state_values(net):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    state = np.reshape((0, 1, -1, 0, 1, 0, -1, 0, 0), game.board_shape)
    expected_len = game.board_shape[0] * game.board_shape[1]

    values = agent._state_values(state)

    assert isinstance(values, torch.Tensor)
    assert len(values) == expected_len
Esempio n. 9
0
def test_TablePlayer_play(value_map):
    # arrange
    player = TablePlayer(value_map)
    marker = 1
    game = Game()

    # act
    loc = player.play(marker, game)

    # assert
    assert isinstance(loc, tuple)
Esempio n. 10
0
def test_NeuralPlayer_reward_move(net):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    state = np.reshape((0, 1, -1, 0, 1, 0, -1, 0, 0), game.board_shape)
    marker = 1
    move = (2, 1)
    reward = 1
    temp_disc = 1

    reward_mods = agent._reward_move(state, marker, move, reward, temp_disc,
                                     game.ind_to_loc)

    assert isinstance(reward_mods, list)
Esempio n. 11
0
def test_one_hot_state():
    game = Game()
    state = np.reshape((0, 1, -1, 0, 1, 0, -1, 0, 0), game.board_shape)
    marker_order = [-1, 0, 1]
    expected_size = len(marker_order) * state.size
    expected_ohe = np.array([
        0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
        0, 0, 0
    ],
                            dtype=np.int8)

    ohe = one_hot_state(state, marker_order)

    assert ohe.size == expected_size
    assert (ohe == expected_ohe).all()
Esempio n. 12
0
def test_NeuralPlayer_process_state_reward(net):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    state = np.reshape((0, 1, -1, 0, 1, 0, -1, 0, 0), game.board_shape)
    transform = {'func': None, 'args': {}}
    move = (2, 1)
    reward = 1
    temp_disc = 1
    equiv = False

    mod = agent._process_state_reward(state, transform, move, reward,
                                      temp_disc, equiv, game.ind_to_loc)

    assert isinstance(mod, ValueMod)
Esempio n. 13
0
def test_NeuralPlayer_update_value_with_reward(net, value, reward):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    temp_disc = 0.5

    updated = agent._update_value_with_reward(value, reward, lr, temp_disc)

    assert updated >= 0
    assert updated <= 1
    if reward == 0:
        assert updated == value
    elif reward > 0:
        assert updated > value
    elif reward < 0:
        assert updated < value
Esempio n. 14
0
def test_NeuralPlayer_process_reward_lose(net):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    marker = 1
    agent.buffer = [
        MoveRecord(state=np.reshape((0, -1, -1, 0, 0, 1, 0, 0, 1),
                                    game.board_shape),
                   move=(1, 1),
                   marker=marker)
    ]
    reward = -1

    reward_mods = agent.process_reward(reward, game.ind_to_loc)

    assert len(agent.reward_record) > 0
Esempio n. 15
0
def test_NeuralPlayer_calc_target_values(net):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    n_vals = game.board_shape[0] * game.board_shape[1]
    rand_vals = list(np.random.rand(n_vals, 1))
    values = torch.tensor(rand_vals, dtype=float)
    move_ind = 5
    valid_inds = [1, 2, 3]
    current = values[move_ind].item()
    updated = current * 1.1

    targets = agent._calc_target_values(values, current, updated, move_ind,
                                        valid_inds)

    assert np.isclose(torch.sum(targets).item(), 1)
Esempio n. 16
0
def test_NeuralPlayer_process_reward_no_reward(net):
    game = Game()
    lr = 0.25
    agent = NeuralPlayer(net, lr)
    marker = 1
    agent.buffer = [
        MoveRecord(state=np.reshape((0, -1, -1, 0, 0, 1, 0, 0, 1),
                                    game.board_shape),
                   move=(0, 0),
                   marker=marker),
        MoveRecord(state=np.reshape((1, -1, -1, 0, 0, 1, -1, 0, 1),
                                    game.board_shape),
                   move=(2, 1),
                   marker=marker)
    ]
    reward = 0
    expected_mods = []

    reward_mods = agent.process_reward(reward, game.ind_to_loc)

    assert reward_mods == expected_mods
Esempio n. 17
0
def initialize_value_map(init_val: float) -> dict:
    """Initialize a value map.

    Args:
        init_val: float, initial value

    Returns:
        init_value_map: dict, value map
    """

    prod_combs = product(Game.valid_markers + [Game.empty_marker],
                         repeat=Game.board_shape[0]**2)
    valid_combs = [pc for pc in prod_combs if abs(sum(pc)) < 2]

    non_dupes = []
    for vc in valid_combs:
        swap = tuple([elem * -1 for elem in vc])
        if swap not in non_dupes:
            non_dupes.append(vc)

    combs = []
    for nd in non_dupes:
        c_box = np.reshape(nd, Game.board_shape)
        rot90 = np.rot90(c_box)
        if tuple(rot90.flatten()) in combs:
            continue
        rot180 = np.rot90(c_box, k=2)
        if tuple(rot180.flatten()) in combs:
            continue
        rot270 = np.rot90(c_box, k=3)
        if tuple(rot270.flatten()) in combs:
            continue
        lr = np.fliplr(c_box)
        if tuple(lr.flatten()) in combs:
            continue
        ud = np.flipud(c_box)
        if tuple(ud.flatten()) in combs:
            continue
        combs.append(nd)

    # can't have more than one valid won state
    states = []
    for c in combs:
        game = Game()
        game.state = np.reshape(c, Game.board_shape)
        try:
            game._update_won()
            states.append(c)
        except ValueError:
            pass

    init_value_map = {
        s: {
            m: {
                a: init_val
                for a in state_to_actions(s, Game.ind_to_loc,
                                          Game.empty_marker)
            }
            for m in [-1, 1]
        }
        for s in states
    }

    for s in init_value_map:
        game = Game()
        game.state = np.reshape(s, game.board_shape)
        game._update_won()
        for m in init_value_map[s]:
            # won state: no actions, just reward value
            if game.won in game.valid_markers:
                init_value_map[s][m] = 1 if m == game.won else 0
            # full board: no actions, just initial value
            elif len(init_value_map[s][m]) == 0:
                init_value_map[s][m] = INITIAL_VALUE
            # cannot be marker's turn: no actions
            # NOTE: I don't explicitly reverse transform a marker swap
            #       so can't assume markers will match
            # elif sum(s) == m:
            #     init_value_map[s][m] = {}

    return init_value_map
Esempio n. 18
0
                           previous=current,
                           new=updated)
            reward_mods.append(mod)

        return reward_mods


if __name__ == '__main__':
    init_value_map = initialize_value_map(INITIAL_VALUE)
    agent = TablePlayer(init_value_map)
    competitor = TablePlayer(init_value_map)

    # train against a player who is learning how to beat you
    trains = []
    for _ in range(100000):
        game = Game()
        play_game(game, agent, competitor)
        trains.append(game.won)

    trains_mv = moving_value_frequencies(trains)
    plot_outcome_frequencies(trains_mv,
                             order=[1, 0, -1],
                             labels=['Agent Wins', 'Tie', 'Competitor Wins'])

    # test against a random player to see how much we've learned
    agent.explore = False
    agent.learning_rate = 0
    rando = TablePlayer(init_value_map)
    rando.learning_rate = 0

    tests = []
Esempio n. 19
0
def net():
    return linear_net(Game())
Esempio n. 20
0
            mods = self._reward_move(entry.state, entry.marker, entry.move,
                                     reward, temporal_discount, ind_to_loc)
            reward_mods.extend(mods)
            temporal_discount *= self.temporal_discount_rate

        self.reward_record = reward_mods


if __name__ == '__main__':
    lr = 0.25
    nn_lr = 1e-3
    temp_rate = 0.8
    layers = [2, 1, 0.5]  # [1]
    drop_prob = 0.0  # 0.05

    agent = NeuralPlayer(linear_net(Game(),
                                    hidden_layers=layers,
                                    drop_prob=drop_prob),
                         lr=lr,
                         temp_rate=temp_rate,
                         nn_lr=nn_lr)
    competitor = NeuralPlayer(linear_net(Game(),
                                         hidden_layers=layers,
                                         drop_prob=drop_prob),
                              lr=lr,
                              temp_rate=temp_rate,
                              nn_lr=nn_lr)
    rando = RandomPlayer()

    n = 50000
    outcomes = []