Ejemplo n.º 1
0
def play_versus_learner(strategy, value_map, verbose=True, explore=0.1):
    state0 = (0, 0, 0, 0, 0, 0, 0, 0, 0)
    alpha = 0.5
    while True:
        state1 = strategy(1, state0)
        if is_winner(1, state1):
            value_map[state0] = 0
            if verbose: print("\033[1;31;40m player 1 wins \n")
            return -1
        if is_finished(state1):
            value_map[state0] = 0.5
            if verbose: print("\033[1;33;40m draw \n")
            return 0
        state2 = player_move_learner(2, state1, value_map, False, explore)
        #update after every player 2 move=>
        value_map[state0] = (
            value_map.get(state0, 0.5) + alpha *
            (value_map.get(state2, 0.5) - value_map.get(state0, explore)))
        if is_winner(2, state2):
            value_map[state2] = 1
            if verbose: print("\033[1;34;40m player 2 wins \n")
            return 1
        if is_finished(state2):
            value_map[state2] = 0.5
            if verbose: print("\033[1;33;40m draw \n")
            return 0
        # reset to iterate again
        state0 = state2
Ejemplo n.º 2
0
def random_versus_never_lose(verbose=True):
    state0 = (0, 0, 0, 0, 0, 0, 0, 0, 0)
    while True:
        state1 = player_move_random(1, state0)
        if is_winner(1, state1):
            if verbose: print("\033[1;31;40m player 1 wins \n")
            return -1
        if is_finished(state1):
            if verbose: print("\033[1;33;40m draw \n")
            return 0
        state2 = player_move_never_lose(2, state1)
        if is_winner(2, state2):
            if verbose: print("\033[1;34;40m player 2 wins \n")
            return 1
        if is_finished(state2):
            if verbose: print("\033[1;33;40m draw \n")
            return 0
        # reset to iterate again
        state0 = state2
Ejemplo n.º 3
0
def next_play(player, state, strategy, verbose=False):
    next_state = strategy(player, state)
    if verbose: print(state_str(next_state) + "\n-----")
    if is_winner(player, next_state):
        if verbose: print("player %i wins" % player)
        return (player, None)
    elif is_finished(next_state):
        if verbose: print("draw")
        return (0, None)
    else:
        return (None, next_state)
Ejemplo n.º 4
0
def two_learners(value_map_1, value_map_2, explore1, explore2, verbose=False):
    state0 = (0, 0, 0, 0, 0, 0, 0, 0, 0)
    state1 = player_move_learner(1, state0, value_map_1, False, explore1)
    alpha = 0.5  # learning rate is set to 0.5
    while True:

        state2 = player_move_learner(2, state1, value_map_2, False, explore2)
        #update after every player 2 move

        value_map_2[state0] = (
            value_map_2.get(state0, 0.5) + alpha *
            (value_map_2.get(state2, 0.5) - value_map_2.get(state0, explore2)))
        if is_winner(2, state2):
            value_map_2[state2] = 1
            if verbose: print("\033[1;34;40m player 2 wins \n")
            return -1
        if is_finished(state2):
            value_map_2[state2] = 0.5
            if verbose: print("\033[1;33;40m draw \n")
            return 0

        state3 = player_move_learner(1, state2, value_map_1, False, explore1)

        value_map_1[state1] = (
            value_map_1.get(state1, 0.5) + alpha *
            (value_map_1.get(state3, 0.5) - value_map_1.get(state1, explore1)))

        if is_winner(1, state3):
            value_map_1[state0] = 0
            if verbose: print("\033[1;31;40m player 1 wins \n")
            return 1
        if is_finished(state3):
            value_map_1[state0] = 0.5
            if verbose: print("\033[1;33;40m draw \n")
            return 0

        # reset to iterate again
        state0 = state2
        state1 = state3