def action(board, dice, oplayer, i = 0):

    flippedplayer = -1
    if (flippedplayer == oplayer): # view it from player 1 perspective
        board = flipped_agent.flip_board(np.copy(board))
        player = -oplayer # player now the other player +1
    else:
        player = oplayer
    possible_moves, possible_boards = Backgammon.legal_moves(board, dice, player)
    na = len(possible_boards)
    if (na == 0):
        return []
    xa = np.zeros((na,nx+1))
    va = np.zeros((na))
    for j in range(0, na):
        xa[j,:] = one_hot_encoding(possible_boards[j],i)
    x = Variable(torch.tensor(xa.transpose(), dtype = torch.float, device = device))
    # now do a forward pass to evaluate the board's after-state value
    h = torch.mm(w1,x) + b1 # matrix-multiply x with input weight w1 and add bias
    h_sigmoid = h.sigmoid() # squash this with a sigmoid function
    y = torch.mm(w2,h_sigmoid) + b2 # multiply with the output weights w2 and add bias
    va = y.sigmoid().detach().cpu()
    action = possible_moves[np.argmax(va)]
    if (flippedplayer == oplayer): # map this move to right view
        action = flipped_agent.flip_move(action)
    return action
Example #2
0
 def legal_moves(self, board, dice, player):
     if player == -1:
         board = FA.flip_board(np.copy(board))
     moves, boards = B.legal_moves(board=board, dice=dice, player=1)
     if len(boards) == 0:
         return [], []
     boards = np.vstack(boards)
     return moves, boards
Example #3
0
def action(board_copy, dice, player, i):
    
    if player == -1:
        board_copy = FA.flip_board(np.copy(board_copy))
    possible_moves, possible_boards = B.legal_moves(board_copy, dice, 1)
    
    if len(possible_moves) == 0:
        return []
    
    action = AgentJ.sample_action(np.vstack(possible_boards))
    move = possible_moves[action]
    if player == -1:
        move = FA.flip_move(move)
    return move
def action(net, board_copy, dice, player, i):

    if player == -1:
        board_copy = flipped_agent.flip_board(board_copy)  # #Flip the board
    # check out the legal moves available for the throw
    possible_moves, possible_boards = Backgammon.legal_moves(board_copy, dice, player=1)

    if len(possible_moves) == 0:
        return []
    move = []
    


    if player == -1:
        move = flipped_agent.flip_move(move)  # ##Flip the move
    return move
Example #5
0
def action(board, dice, oplayer):
    flippedplayer = -1
    if (flippedplayer == oplayer):  # view it from player 1 perspective
        board = flipped_agent.flip_board(np.copy(board))
        player = -oplayer  # player now the other player +1
    else:
        player = oplayer
    possible_moves, possible_boards = e_legal_moves(board, dice, 1)
    if len(possible_moves) == 0:
        return []
    #index = get_action(actor, possible_boards)
    index = epsilon_greedy(critic, possible_boards)
    action = possible_moves[index]
    #print("ACTION")
    #print(action)
    if (flippedplayer == oplayer):  # map this move to right view
        action = flipped_agent.flip_move(action)
    return action
Example #6
0
def action(board, dice, oplayer, nRoll = 0):
    flipped_player = -1
    if (flipped_player == oplayer):
        board = flipped_agent.flip_board(np.copy(board))
        player = -flipped_player
    else:
        player = oplayer
    # check out the legal moves available for the throw
    race = c_int(israce(board))
    possible_moves, possible_boards = Backgammon.legal_moves(board, dice, player)
    na = len(possible_moves)
    va = np.zeros(na)
    if (na == 0):
        return []
    for i in range(0, na):
        board = pubeval_flip(possible_boards[i])
        board = board.astype(dtype = ctypes.c_int)
        va[i] = lib.pubeval(race, board.ctypes.data_as(intp))
    action = possible_moves[np.argmax(va)]
    if (flipped_player == oplayer): # map this move to right view
        action = flipped_agent.flip_move(action)
    return action
def action(board, dice, oplayer, i=0):

    flippedplayer = -1
    if (flippedplayer == oplayer):  # view it from player 1 perspective
        board = flipped_agent.flip_board(np.copy(board))
        player = -oplayer  # player now the other player +1
    else:
        player = oplayer

    possible_moves, possible_boards = Backgammon.legal_moves(
        board, dice, player)

    # if there are no moves available
    if len(possible_moves) == 0:
        return []

    after_state, action = epsilon_nn_greedy(board, possible_moves,
                                            possible_boards, player)

    if (flippedplayer == oplayer):  # map this move to right view
        action = flipped_agent.flip_move(action)
    return action
Example #8
0
 def swap_player(self):
     self.board = FA.flip_board(board_copy=np.copy(self.board))
Example #9
0
def learnit(numgames, epsilon, lam, alpha, alpha1, alpha2, w1, b1, w2, b2):
    gamma = 1 # for completeness
    # play numgames games for training
    for games in range(0, numgames):
        board = Backgammon.init_board()    # initialize the board (empty)
        # now we initilize all the eligibility traces for the neural network
        Z_w1 = torch.zeros(w1.size(), device = device, dtype = torch.float)
        Z_b1 = torch.zeros(b1.size(), device = device, dtype = torch.float)
        Z_w2 = torch.zeros(w2.size(), device = device, dtype = torch.float)
        Z_b2 = torch.zeros(b2.size(), device = device, dtype = torch.float)
        # player to start is "1" the other player is "-1"
        player = 1
        otherplayer = -1
        winner = 0 # this implies a draw
        isGameOver = False
        moveNumber = 0
        while (isGameOver == False):
            dice = Backgammon.roll_dice()
            # use a policy to find action
            # both are using the neural-network to approximate the after-state value
            if (player == otherplayer): # this one flippes the board to find an action.
                possible_moves, possible_boards = Backgammon.legal_moves(flipped_agent.flip_board(np.copy(board)), dice, -player)
                action = epsilon_nn_greedy(flipped_agent.flip_board(np.copy(board)), dice, -player, epsilon, w1, b1, w2, b2,  possible_moves, possible_boards, False)
                action = flipped_agent.flip_move(action)
            else: # this one uses the original board.
                possible_moves, possible_boards = Backgammon.legal_moves(board, dice, player)
                action = epsilon_nn_greedy(np.copy(board), dice, player, epsilon, w1, b1, w2, b2, possible_moves, possible_boards, False)
            # perform move and update board
            for i in range(0,len(action)):
                board = Backgammon.update_board(board, action[i], player)
            if (1 == Backgammon.game_over(board)): # has this player won?
                winner = player
                isGameOver = True
                break # bail out of inner game loop
            # once both player have performed at least one move we can start doing updates
            if (1 < moveNumber):
                if otherplayer == player: # here we have player -1 updating the table V
                    x_flipped = Variable(torch.tensor(one_hot_encoding(flipped_agent.flip_board(board)), dtype = torch.float, device = device)).view(28*2*6,1)
                    h = torch.mm(w1,x_flipped) + b1 # matrix-multiply x with input weight w1 and add bias
                    h_sigmoid = h.sigmoid() # squash this with a sigmoid function
                    y = torch.mm(w2,h_sigmoid) + b2 # multiply with the output weights w2 and add bias
                    y_sigmoid = y.sigmoid() # squash this with a sigmoid function
                    target = y_sigmoid.detach().cpu().numpy()
                    # lets also do a forward past for the old board, this is the state we will update
                    h = torch.mm(w1,xold_flipped) + b1 # matrix-multiply x with input weight w1 and add bias
                    h_sigmoid = h.sigmoid() # squash this with a sigmoid function
                    y = torch.mm(w2,h_sigmoid) + b2 # multiply with the output weights w2 and add bias
                    y_sigmoid = y.sigmoid() # squash the output
                    delta2 = 0 + gamma * target - y_sigmoid.detach().cpu().numpy() # this is the usual TD error
                else: # here we have player 1 updating the neural-network (2 layer feed forward with Sigmoid units)
                    x = Variable(torch.tensor(one_hot_encoding(board), dtype = torch.float, device = device)).view(28*2*6,1)
                    # now do a forward pass to evaluate the new board's after-state value
                    h = torch.mm(w1,x) + b1 # matrix-multiply x with input weight w1 and add bias
                    h_sigmoid = h.sigmoid() # squash this with a sigmoid function
                    y = torch.mm(w2,h_sigmoid) + b2 # multiply with the output weights w2 and add bias
                    y_sigmoid = y.sigmoid() # squash this with a sigmoid function
                    target = y_sigmoid.detach().cpu().numpy()
                    # lets also do a forward past for the old board, this is the state we will update
                    h = torch.mm(w1,xold) + b1 # matrix-multiply x with input weight w1 and add bias
                    h_sigmoid = h.sigmoid() # squash this with a sigmoid function
                    y = torch.mm(w2,h_sigmoid) + b2 # multiply with the output weights w2 and add bias
                    y_sigmoid = y.sigmoid() # squash the output
                    delta2 = 0 + gamma * target - y_sigmoid.detach().cpu().numpy() # this is the usual TD error
                # using autograd and the contructed computational graph in pytorch compute all gradients
                y_sigmoid.backward()
                # update the eligibility traces using the gradients
                Z_w1 = gamma * lam * Z_w1 + w1.grad.data
                Z_b1 = gamma * lam * Z_b1 + b1.grad.data
                Z_w2 = gamma * lam * Z_w2 + w2.grad.data
                Z_b2 = gamma * lam * Z_b2 + b2.grad.data
                # zero the gradients
                w1.grad.data.zero_()
                b1.grad.data.zero_()
                w2.grad.data.zero_()
                b2.grad.data.zero_()
                # perform now the update for the weights
                delta2 =  torch.tensor(delta2, dtype = torch.float, device = device)
                w1.data = w1.data + alpha1 * delta2 * Z_w1
                b1.data = b1.data + alpha1 * delta2 * Z_b1
                w2.data = w2.data + alpha2 * delta2 * Z_w2
                b2.data = b2.data + alpha2 * delta2 * Z_b2

            # we need to keep track of the last board state visited by the players
            if otherplayer == player:
                xold_flipped = Variable(torch.tensor(one_hot_encoding(flipped_agent.flip_board(board)), dtype=torch.float, device = device)).view(28*2*6,1)
            else:
                xold = Variable(torch.tensor(one_hot_encoding(board), dtype=torch.float, device = device)).view(28*2*6,1)
            # swap players
            player = -player
            moveNumber = moveNumber + 1

        # The game epsiode has ended and we know the outcome of the game, and can find the terminal rewards
        if winner == otherplayer:
            reward = 0
        elif winner == -otherplayer:
            reward = 1
        else:
            reward = 0.5
        # Now we perform the final update (terminal after-state value is zero)
        # these are basically the same updates as in the inner loop but for the final-after-states (xold and xold_flipped)
        
        # Fist we update the values for player -1
        h = torch.mm(w1,xold_flipped) + b1 # matrix-multiply x with input weight w1 and add bias
        h_sigmoid = h.sigmoid() # squash this with a sigmoid function
        y = torch.mm(w2,h_sigmoid) + b2 # multiply with the output weights w2 and add bias
        y_sigmoid = y.sigmoid() # squash the output
        delta = (1.0 - reward) + gamma * 0 - y_sigmoid.detach().cpu().numpy()
        # using autograd and the contructed computational graph in pytorch compute all gradients
        y_sigmoid.backward()
        # update the eligibility traces
        Z_w1 = gamma * lam * Z_w1 + w1.grad.data
        Z_b1 = gamma * lam * Z_b1 + b1.grad.data
        Z_w2 = gamma * lam * Z_w2 + w2.grad.data
        Z_b2 = gamma * lam * Z_b2 + b2.grad.data
        # zero the gradients
        w1.grad.data.zero_()
        b1.grad.data.zero_()
        w2.grad.data.zero_()
        b2.grad.data.zero_()
        # perform now the update of weights
        delta =  torch.tensor(delta, dtype = torch.float, device = device)
        w1.data = w1.data + alpha1 * delta * Z_w1
        b1.data = b1.data + alpha1 * delta * Z_b1
        w2.data = w2.data + alpha2 * delta * Z_w2
        b2.data = b2.data + alpha2 * delta * Z_b2
        
        # Then we update the values for player 1
        h = torch.mm(w1,xold) + b1 # matrix-multiply x with input weight w1 and add bias
        h_sigmoid = h.sigmoid() # squash this with a sigmoid function
        y = torch.mm(w2,h_sigmoid) + b2 # multiply with the output weights w2 and add bias
        y_sigmoid = y.sigmoid() # squash the output
        delta2 = reward + gamma * 0 - y_sigmoid.detach().cpu().numpy()  # this is the usual TD error
        # using autograd and the contructed computational graph in pytorch compute all gradients
        y_sigmoid.backward()
        # update the eligibility traces
        Z_w1 = gamma * lam * Z_w1 + w1.grad.data
        Z_b1 = gamma * lam * Z_b1 + b1.grad.data
        Z_w2 = gamma * lam * Z_w2 + w2.grad.data
        Z_b2 = gamma * lam * Z_b2 + b2.grad.data
        # zero the gradients
        w1.grad.data.zero_()
        b1.grad.data.zero_()
        w2.grad.data.zero_()
        b2.grad.data.zero_()
        # perform now the update of weights
        delta2 =  torch.tensor(delta2, dtype = torch.float, device = device)
        w1.data = w1.data + alpha1 * delta2 * Z_w1
        b1.data = b1.data + alpha1 * delta2 * Z_b1
        w2.data = w2.data + alpha2 * delta2 * Z_w2
        b2.data = b2.data + alpha2 * delta2 * Z_b2
Example #10
0
def play_a_game(opponent, commentary = False):
    board = init_board() # initialize the board
    player = np.random.randint(2)*2-1 # which player begins?
    y_old = 0
    y_old_p2 = 0
    firstMove = True
    firstMove_p2 = True
    pickle_in = open("randommodel.pickle","rb")
    model = pickle.load(pickle_in)
    model = model.cuda()
    # play on
    while not game_over(board) and not check_for_error(board):
        if commentary: print("lets go player ",player)
        
        # roll dice
        dice = roll_dice()
        if commentary: print("rolled dices:", dice)
            
        # make a move (2 moves if the same number appears on the dice)
        for i in range(1+int(dice[0] == dice[1])):
            board_copy = np.copy(board) 

            # make the move (agent vs agent):
            if(opponent == "agent"):
                if player == 1:
                    move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, True)
                    # update the board
                    if len(move) != 0:
                        for m in move:
                            board = update_board(board, m, player)
                    if(firstMove):
                        firstMove = False
                elif player == -1:
                    flipped_board = flipped_agent.flip_board(board_copy)
                    move, y_old_p2 = agent.action(flipped_board,dice,1,i, y_old_p2, model, firstMove_p2, True)
                    if len(move) != 0:
                        for m in move:
                            flipped_board = update_board(flipped_board, m, 1)
                    board = flipped_agent.flip_board(flipped_board)
            
                    if(firstMove_p2):
                        firstMove_p2 = False
            elif(opponent == "human"):
                pretty_print(board)
                if player == 1:
                    print("Computer's turn")
                    move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, False)
                    print("Computer's move", move)
                elif player == -1:
                    print("Human's turn")
                    possible_moves, possible_boards = legal_moves(board_copy, dice, player)
                    print("dice:", dice)
                    printing.moves_to_string(possible_moves)
                    text = input("prompt")
                    move = possible_moves[int(text)]

                if len(move) != 0:
                    for m in move:
                        board = update_board(board, m, player)
            #if you're playing vs random agent:
            elif(opponent == "random"):
                if player == 1:
                    move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, False)
                elif player == -1:
                    move = random_agent(board_copy,dice,player,i)
                if len(move) != 0:
                        for m in move:
                            board = update_board(board, m, player)
            # update the board

            
            # give status after every move:         
            if commentary: 
                print("move from player",player,":")
                pretty_print(board)
        

        # players take turns 
        player = -player


    # return the winner
    winner = -1*player
    if(opponent == "agent"):
        if(winner == 1):
            agent.learn(y_old, model, board_copy, "yes")
            agent.learn(y_old_p2, model, board_copy, "no")
        else:
            agent.learn(y_old, model, board_copy, "no")
            agent.learn(y_old_p2, model, board_copy, "yes")
        
    #print("Winner is player", winner)
    pickle_out = open("randommodel.pickle","wb")
    pickle.dump(model, pickle_out)
    pickle_out.close()
    return winner