Beispiel #1
0
def play():
    board=np.zeros((6,7))
    backtrack=[[]]
    x=np.loadtxt("test(9).txt")
    w=3
    l=0
    pc=0
    player=1
    print(board)
    while(w==3):
        
        x=search5(x,board,player)
        if player==2:
            x=change_value_sign(x)
        print(s2D(get_best_valued_child(x,board,player)[0:(get_best_valued_child(x,board,player)).size-2]))        
        if player==2:
            x=change_value_sign(x)

        action=input("player "+str(player)+", choose action 0-6:")
        action=int(action)
        w,l,board=update(board,action,player)
        print("action taken: "+str(action)+" by player: "+str(player))
        if l==1:
            pc+=1
            player=(pc%2)+1
        print(board)
Beispiel #2
0
def get_actions(state, player):
    actions = []
    state = s2D(state).copy()
    for i in range(action_space):
        if update(state, i, player)[1] == 1:
            actions.append(i)

    return np.array(actions)
Beispiel #3
0
def collect_possible_childs(state, player, data):

    state = s2D(state)
    x = get_actions(state, player)
    childs = np.zeros((x.size, state.size + 2))
    for i in range(x.size):
        next_state = update(state, x[i], player)[2]
        if not (np.array_equal(next_state, state)):
            childs[i, 0:state.size] = next_state.flatten()
            childs[i, childs.shape[1] - 2] = visitcount(data, next_state)
            childs[i, childs.shape[1] - 1] = value(data, next_state)
    return childs
Beispiel #4
0
def playout(state, player):
    #w=3 ... 3 for game not finished/sync with update function
    w = 3
    state = s2D(state)
    p = player
    while w == 3:
        c = randint(0, action_space - 1)
        w, l, state = update(state, c, p)
        if l == 1:
            p = (p % 2) + 1

    #playout went from the child of the leaf and started with opposite player to the player of the leaf ... if winning player is player of leaf, the reward should be positive .. we change player twice (1. in call of function, 2. after w changed in while it gets changed 1 last time) so if the winning player == p .. pos reward

# if player==p:
#     return w
#
# return -w
    return w
Beispiel #5
0
def mcts(data, state, player, backtrack, rem):

    #selection: select best states according to policy(rewards/visits) until state has 1 or more unvisited children
    #expand: select 1 random unvisited child and add it to tree with 0 value, 0 visits
    #simulation: simulate from created unvisited child to end with random actions and get reward
    #backpropagation: update stats of all nodes traveled down to simulation ... all visits +1 / pos. reward for all  nodes opposite of winner, neg. reward for all nodes of winner (pos reward for the states that are used to determine the best action for the winner ... which are the states in which the loser needs choose an action)
    #track states while selecting and expanding to update easy while backpropagating
    #print("state:")
    #print(state)
    #plan:
    #inputstate player1s turn:
    #---select state but reverse sign of data when action for player 2 needs to be selected to select highest value(is lowest value in main data)

    #check if this is the first recursive/if there is a backtrack
    #first recursive: make the current state the backtrack array and reshape it to accept more states
    if rem == 0:
        backtrack = s1D(state).copy()
        backtrack = np.reshape(backtrack, (-1, backtrack.shape[0]))
        rem = 1
    #not first recursive: add current state to backtrack array
    else:
        backtrack = addBacktrack(backtrack, state)
    #change value sign ... 2nd player wants to select the best states, which are the best for player 1
    #print("backtrack:")
    #print(backtrack)
    if player == 2:
        data = change_value_sign(data)

    childs = collect_possible_childs(state, player, data)

    if player == 2:
        data = change_value_sign(data)

    #print("childs:")
    #print(childs)
    unvisited = check_if_leaf(state, player)
    #print("unvisited:")
    #print(unvisited)
    #reverse sign only when selecting ... dont reverse it for backprop it only matters what player won not what player started the backtrack
    #if current state is terminal state, no expansion/simulation needed, get value of terminal state and backprop

    if get_actions(state, player).size == 0:
        state = s2D(state)
        cpv = update(state, None, player)[0]

        data = backprop(backtrack, data, cpv)
        return data, backtrack, rem

    if unvisited.size == 0:

        # select next child and remember it for backpropagation
        next_state = get_best_child(childs)
        #print("next_state")
        #print(next_state)
        data, backtrack, rem = mcts(data, next_state, (player % 2) + 1,
                                    backtrack, rem)
        return data, backtrack, rem
    else:

        sel_child = childs[int(unvisited[randint(0, unvisited.size - 1)]),
                           0:childs.shape[1] - 2]
        sel_child = np.reshape(sel_child, (-1, dimensionx))
        #print("sel_child:")
        #print(sel_child)
        data = addState(data, sel_child)
        backtrack = addBacktrack(backtrack, sel_child)
        #print("new backtrack:")
        #print(backtrack)
        #cpv=current playout value
        cpv = playout(sel_child, (player % 2) + 1)

        #print("cpv:")
        #print(cpv)
        #print("old data:")
        #print(data)
        data = backprop(backtrack, data, cpv)
        #print("new data after backprop:")
        #print(data)
    #change value sign back so that the standard is player 1

    return data, backtrack, rem