예제 #1
0
def action(net, board_copy, dice, player, i):
    # the champion to be
    # inputs are the board, the dice and which player is to move
    # outputs the chosen move accordingly to its policy

    if player == -1: board_copy = flip_board(board_copy)  ##Flip the board
    # check out the legal moves available for the throw
    possible_moves, possible_boards = Backgammon.legal_moves(board_copy,
                                                             dice,
                                                             player=1)

    # if there are no moves available
    if len(possible_moves) == 0:
        return []

    feature_boards = []
    ###Create new features using Tsesauros
    for b in possible_boards:
        feature_boards.append(oneHot(b))

    ### Get probabilites of each action via the actor forward
    probs, log_probs = net.actor.forward(feature_boards)

    ###index is used as a help to pick action
    index = np.arange(0, len(possible_boards))
    ###This works because numpy and pytorch hate each other
    probs = probs.detach().numpy()

    ###The index of the action chose
    i = choice(index, p=probs)
    move = possible_moves[
        i]  ###Pick the next move according to the index selected
    newBoard = possible_boards[
        i]  ###Pick the nex board according to the index selected
    newBoardFeatures = oneHot(newBoard)

    ### Critic feedforward
    target, oldtarget = net.critic.forward(newBoardFeatures, oneHot(
        board_copy))  #(newBoardFeatures,getFeatures(board_copy,player) )

    R = 0
    if (Backgammon.game_over(newBoard)
        ):  ###Did I win? If so the reward shall be +1
        R = 1
        target = 0  ###Terminal state is 0

    ### Now we update the neaural network

#    target, oldtarget =net.critic.forward(newBoardFeatures,getFeatures(board_copy,player) )

    delta = R + net.gamma * target - oldtarget

    ###Update the critic via backpropgation
    net.critic.backward(R, delta, net.gamma)
    ###Update the actor via backpropogation
    net.actor.backward(log_probs[i], delta, net.gamma)

    if player == -1: move = flip_move(move)  ###Flip the move

    return move
예제 #2
0
 def ExamplePolicy(self):
     _, st = B.legal_moves(B.init_board(), B.roll_dice(), 1)
     st = np.vstack(st)
     st = st[:, 1:]
     out = np.round(
         self._s.run(self._actor_policy, ({
             self._possible_states: st
         })) * 100) / 100
     out = out.flatten()
     out.sort()
     return out[::-1]
    def do(self, board_real, dice, actor_theta, player):
        commentary = False
        print_results = False
        for i in range(0, 25):
            board = np.copy(board_real)
            old_state = np.copy(board_real)
            self.z = np.zeros(198)
            if(len(board) == 0):
                break
            count = 0
            while not Backgammon.game_over(board) and not Backgammon.check_for_error(board):
                if commentary:
                    print("Simulationgame: lets go player ", player)

                dice = Backgammon.roll_dice()
                if commentary:
                    print("Simulationgame: rolled dices:", dice)

                # make a move (2 moves if the same number appears on the dice)
                for i in range(1 + int(dice[0] == dice[1])):
                    board_copy = np.copy(board)
                    if player == 1:
                        
                        move, new_state = self.nextMove(board_copy, dice, player, actor_theta)
                        
                        
                    elif player == -1:
                        move = agentX.action(board_copy, dice, player, i)
                    if len(move) != 0:

                        for m in move:
                            board = Backgammon.update_board(board, m, player)
                        if(player == 1 and count > 1):
                            new_state = np.copy(board)  
                            if(not Backgammon.game_over(new_state) and not Backgammon.check_for_error(new_state)):


                                delta = 0 + self.getValue(new_state, actor_theta, player) - self.getValue(old_state, actor_theta, player)
                                self.theta = self.theta + (self.alpha * delta * self.z)
                                self.z = self.lamb * self.z + getFeatures(old_state, player)
                                old_state = new_state

                    if commentary:
                        print("Simulationgame: move from player", player, ":")
                        Backgammon.pretty_print(board)
                player = -player
                count = count + 1   
            if(print_results):
                print("simulation game nr", i)
                Backgammon.pretty_print(board)
            delta = player * -1 + 0 - self.getValue(old_state, actor_theta, player)
            
            self.theta = np.add(self.theta , (self.alpha * delta * self.z))
            self.z = self.lamb * self.z + getFeatures(old_state, player)
def learnit(numGames, agent):
    numWins = []
    for g in tqdm(range(numGames)):
        if g % 1000 == 0:
            #print(agent.theta)
            wins = compete(agent)
            numWins.append(wins)

        board = Backgammon.init_board()

        agent.zero_el()
        if (0 == np.random.randint(2)):
            player = 1
        else:
            player = -1

        moveNr = 0
        isGameOver = False

        while (isGameOver == False):
            dice = Backgammon.roll_dice()
            for repeat in range(1 + int(dice[0] == dice[1])):

                action = agent.greedy_action(np.copy(board), dice, player,
                                             repeat)
            for i in range(0, len(action)):
                board = Backgammon.update_board(board, action[i], player)

            R = 0
            if (1 == Backgammon.game_over(board)):
                if (player == 1):
                    R = 1.0
                else:
                    R = 0
                isGameOver = True
            if ((1 < moveNr) & (len(action) > 0)):
                agent.update(player, R, board, isGameOver)

            if (len(action) > 0):
                if player == 1:
                    agent.xold = board
                else:
                    agent.xoldF = flip_board(board)
            player = -player
            moveNr += 1

    x = np.arange(0, numGames, 1000)
    fig = plt.figure()
    #plt.figure(figsize=(30, 30))
    ax = fig.add_subplot(111)
    ax.set_xlabel("Number of games")
    ax.set_ylabel("Wins against a random player")
    ax.plot(x, numWins)
def action(board_copy, dice, player, i):
    # the champion to be
    # inputs are the board, the dice and which player is to move
    # outputs the chosen move accordingly to its policy

    # starts by flipping the board so that the player always sees himself as player 1
    if player == -1: board_copy = flip_board(board_copy)

    # check out the legal moves available for the throw
    possible_moves, possible_boards = Backgammon.legal_moves(board_copy,
                                                             dice,
                                                             player=1)

    # if there are no moves available, return an empty move
    if len(possible_moves) == 0:
        return []

    # Make the bestmove:
    # policy missing, returns a random move for the time being
    #
    #
    #
    #
    #
    move = possible_moves[np.random.randint(len(possible_moves))]

    # if the table was flipped the move has to be flipped as well
    if player == -1: move = flip_move(move)

    return move
    def greedy_action(self, board, dice, player, i):
        if player == -1: board = flip_board(board)

        # check out the legal moves available for the throw
        possible_moves, possible_boards = Backgammon.legal_moves(board,
                                                                 dice,
                                                                 player=1)

        # if there are no moves available, return an empty move
        if len(possible_moves) == 0:
            return []

        na = len(possible_boards)
        enc = np.zeros((na, 312))
        for i in range(0, na):
            enc[i, :] = oneHot(possible_boards[i])
        x = Variable(
            torch.tensor(enc.transpose(),
                         dtype=torch.double,
                         device=self.device))

        h = torch.mm(self.w1, x) + self.b1
        h_sigmoid = h.sigmoid()
        y = torch.mm(self.W, h_sigmoid) + self.B
        va = y.sigmoid().detach().cpu()
        action = possible_moves[np.argmax(va)]

        if player == -1: action = flip_move(action)

        return action
예제 #7
0
def action(board, dice, oplayer, i = 0):

    flippedplayer = -1
    if (flippedplayer == oplayer): # view it from player 1 perspective
        board = flipped_agent.flip_board(np.copy(board))
        player = -oplayer # player now the other player +1
    else:
        player = oplayer
    possible_moves, possible_boards = Backgammon.legal_moves(board, dice, player)
    na = len(possible_boards)
    if (na == 0):
        return []
    xa = np.zeros((na,nx+1))
    va = np.zeros((na))
    for j in range(0, na):
        xa[j,:] = one_hot_encoding(possible_boards[j],i)
    x = Variable(torch.tensor(xa.transpose(), dtype = torch.float, device = device))
    # now do a forward pass to evaluate the board's after-state value
    h = torch.mm(w1,x) + b1 # matrix-multiply x with input weight w1 and add bias
    h_sigmoid = h.sigmoid() # squash this with a sigmoid function
    y = torch.mm(w2,h_sigmoid) + b2 # multiply with the output weights w2 and add bias
    va = y.sigmoid().detach().cpu()
    action = possible_moves[np.argmax(va)]
    if (flippedplayer == oplayer): # map this move to right view
        action = flipped_agent.flip_move(action)
    return action
def action(board_copy, dice, player, i):
    # the champion to be
    # inputs are the board, the dice and which player is to move
    # outputs the chosen move accordingly to its policy

    # check out the legal moves available for the throw
    possible_moves, possible_boards = Backgammon.legal_moves(
        board_copy, dice, player)

    # if there are no moves available
    if len(possible_moves) == 0:
        return []

    # make the best move according to the policy

    # policy missing, returns a random move for the time being
    #
    #
    #
    #
    #
    move = epsilon_nn_greedy(board_copy, player, epsilon, w1, b1, w2, b2,
                             debug)

    return move
예제 #9
0
def main():
    winners = {}
    winners["1"] = 0
    winners["-1"] = 0  # Collecting stats of the games
    nGames = 1000  # how many games?
    arr = np.zeros(nGames)
    for g in range(nGames):
        winner = Backgammon.play_a_game(commentary=False, net=new_agent)
        # print("game %i finished", g)
        # print("winner is ", winner)
        winners[str(winner)] += 1
        arr[g] = winner
        # print("this is hw")
        # print(new_agent.val_func_nn.hidden_weights)
        # print("this is theta")
        # print(new_agent.policy_nn.theta)
        if (g % 50 == 0):
            print(new_agent.torch_nn_policy.theta)
    # print(winners)
    file = open('Failed.py', 'w')
    file.write(np.array_str(arr))
    file.close()
    print("Out of", nGames, "games,")
    print("player", 1, "won", winners["1"], "times and")
    print("player", -1, "won", winners["-1"], "times")
예제 #10
0
def action(board_copy, dice, player, i, model):
    global actionCount

    # check out the legal moves available for the throw
    possible_moves, possible_boards = Backgammon.legal_moves(
        board_copy, dice, player)

    # if there are no moves available
    if len(possible_moves) == 0:
        return []

    #Backgammon.pretty_print(board_copy)

    after_state, action = epsilon_nn_greedy(board_copy, possible_moves,
                                            possible_boards, player, model)
    #model.xtheta = xtheta_mean
    if (actionCount > 0):
        model.updateNeural(after_state)
    if (actionCount > 1):
        model.dynaUpdate()

    actionCount += 1

    model.xold = Variable(
        torch.tensor(one_hot_encoding(after_state),
                     dtype=torch.float,
                     device=model.device)).view((28 * 31, 1))

    return action
예제 #11
0
def action(board_copy, epsilon, dice, player, i):
    if player == -1:
        board_copy = flip_board(board_copy)

    possible_moves, possible_boards = BG.legal_moves(board_copy,
                                                     dice,
                                                     player=1)
    na = len(possible_moves)
    va = np.zeros(na)
    j = 0

    # if there are no moves available
    if na == 0:
        return []
    if (np.random.uniform() < epsilon):
        move = possible_moves[randrange(na)]
        if player == -1:
            move = flip_move(move)
        return move

    for board in possible_boards:
        # encode the board to create the input
        x = Variable(
            torch.tensor(ice_hot_encoding(board),
                         dtype=torch.float,
                         device=device)).view(encSize, 1)
        # now do a forward pass to evaluate the board's after-state value
        va[j] = feed_forward_w(x)
        j += 1
    move = possible_moves[np.argmax(va)]
    if player == -1:
        move = flip_move(move)
    return move
예제 #12
0
def action(board_copy, dice, player, i, model):
    global actionCount
    # starts by flipping the board so that the player always sees himself as player 1
    if player == -1: board_copy = flip_board(board_copy)

    # check out the legal moves available for the throw
    possible_moves, possible_boards = Backgammon.legal_moves(board_copy,
                                                             dice,
                                                             player=1)

    # if there are no moves available, return an empty move
    if len(possible_moves) == 0:
        return []

    # Make the bestmove:
    after_state, action = epsilon_nn_greedy(board_copy, possible_moves,
                                            possible_boards, player, model)
    #model.xtheta = xtheta_mean
    if (actionCount > 0):
        model.updateNeural(after_state)
    if (actionCount > 1):
        model.dynaUpdate()

    actionCount += 1

    model.xold = Variable(
        torch.tensor(one_hot_encoding(after_state),
                     dtype=torch.float,
                     device=model.device)).view((28 * 31, 1))

    # if the table was flipped the move has to be flipped as well
    if player == -1: move = flip_move(action)

    return move
예제 #13
0
def main():
    winners = {}
    winners["1"] = 0
    winners["-1"] = 0  # Collecting stats of the games
    nGames = 10000  # how many games?
    arr = np.zeros(nGames)
    for g in tqdm(range(nGames)):

        #        w=new_agent.actor.theta
        #        print(w)

        ###Zero eligibility traces (according to psudo code)
        agent.actor.zero_el()
        agent.critic.zero_el()

        winner = Backgammon.play_a_game(commentary=False, net=agent)

        winners[str(winner)] += 1
        arr[g] = winner


#        if(g % 100 == 0):
#            print(new_agent.torch_nn_policy.theta)
# print(winners)
#
##Save the agent
    file_net = open('saved_net_one_2', 'wb')
    pickle.dump(agent, file_net)
    file_net.close()

    print("Out of", nGames, "games,")
    print("player", 1, "won", winners["1"], "times and")
    print("player", -1, "won", winners["-1"], "times")
def main():
    ranges = 1
    winners = {}
    winners["1"] = 0
    winners["-1"] = 0  # Collecting stats of the games
    nGames = 1000   # how many games?
    arr = np.zeros(nGames)
    for g in tqdm(range(nGames)):
        # ##Zero eligibility traces (according to psudo code)
        winner = Backgammon.play_a_game(commentary=False, net=agent, train=train)
        winners[str(winner)] += 1
        arr[g] = winner             
        if(g % 10 == 0):

            print(agent.actor.theta)
            k = winners["1"]
            print("winrate is %f" % (k / (g + 0.00000001)))
    # print(winners)
    #  Save the agent
    if(train is True):
        file_net = open('saved_net_one', 'wb')
        pickle.dump(agent.actor.theta, file_net)
        file_net.close()
    print("Out of", ranges, nGames, "games,")
    print("player", 1, "won", winners["1"], "times and")
    print("player", -1, "won", winners["-1"], "times")
예제 #15
0
def action(board_copy,dice,player,i):
    # the champion to be
    # inputs are the board, the dice and which player is to move
    # outputs the chosen move accordingly to its policy
    
    # check out the legal moves available for the throw
    possible_moves, possible_boards = Backgammon.legal_moves(board_copy, dice, player)
    
    # if there are no moves available
    if len(possible_moves) == 0: 
        return [] 
    
    # make the best move according to the policy
    
    # policy missing, returns a random move for the time being
    #
    #
    #
    #
    #
    epsilon = 0.1
    w1 = torch.load('./w1_trained.pth', map_location=lambda storage, loc: storage)
    w2 = torch.load('./w2_trained.pth', map_location=lambda storage, loc: storage)
    b1 = torch.load('./b1_trained.pth', map_location=lambda storage, loc: storage)
    b2 = torch.load('./b2_trained.pth', map_location=lambda storage, loc: storage)
    
    #w1 = torch.load('./w1_trained_first_time_working.pth', map_location=lambda storage, loc: storage)
    #w2 = torch.load('./w2_trained_first_time_working.pth', map_location=lambda storage, loc: storage)
    #b1 = torch.load('./b1_trained_first_time_working.pth', map_location=lambda storage, loc: storage)
    #b2 = torch.load('./b2_trained_first_time_working.pth', map_location=lambda storage, loc: storage)
    move = neural_network_agent.epsilon_nn_greedy(board_copy, dice, player, epsilon, w1, b1, w2, b2, possible_moves, possible_boards, False)

    return move
예제 #16
0
def action(board_copy, dice, player, i):
    global count
    # the champion to be
    # inputs are the board, the dice and which player is to move
    # outputs the chosen move accordingly to its policy

    # check out the legal moves available for the throw
    possible_moves, possible_boards = Backgammon.legal_moves(
        board_copy, dice, player)

    # if there are no moves available
    if len(possible_moves) == 0:
        return []

    # make the best move according to the policy

    na = len(possible_moves)
    va = np.zeros(na)
    for i in range(0, na):
        move = possible_moves[i]
        board = possible_boards[i]

        # encode the board to create the input
        x = Variable(
            torch.tensor(one_hot_encoding(board),
                         dtype=torch.float,
                         device=device)).view(29, 31)
        # now do a forward pass to evaluate the board's after-state value
        h = torch.mm(
            w1, x) + b1  # matrix-multiply x with input weight w1 and add bias
        h_sigmoid = h.sigmoid()  # squash this with a sigmoid function
        y = torch.mm(
            w2,
            h_sigmoid) + b2  # multiply with the output weights w2 and add bias
        y_sigmoid = y.sigmoid()
        z = torch.mm(y_sigmoid, w3) + b3
        va[i] = z.sigmoid()

    count += 1

    if not Backgammon.game_over(possible_boards[np.argmax(va)]):
        update(possible_boards[np.argmax(va)])
    else:
        reward = 1 if player == 1 else 0
        update(possible_boards[np.argmax(va)], reward)

    return possible_moves[np.argmax(va)]
예제 #17
0
 def legal_moves(self, dice, player):
     moves, boards = B.legal_moves(board=self.board,
                                   dice=dice,
                                   player=player)
     if len(boards) == 0:
         return [], []
     boards = np.vstack(boards)
     return moves, boards
예제 #18
0
 def legal_moves(self, board, dice, player):
     if player == -1:
         board = FA.flip_board(np.copy(board))
     moves, boards = B.legal_moves(board=board, dice=dice, player=1)
     if len(boards) == 0:
         return [], []
     boards = np.vstack(boards)
     return moves, boards
예제 #19
0
def play_a_game_random(commentary=False):
    board = BG.init_board()  # initialize the board
    player = np.random.randint(2) * 2 - 1  # which player begins?
    randomPlayer = -1
    while not BG.game_over(board) and not BG.check_for_error(board):
        if commentary: print("lets go player ", player)

        # roll dice
        dice = BG.roll_dice()
        if commentary: print("rolled dices:", dice)

        # make a move (2 moves if the same number appears on the dice)
        for i in range(1 + int(dice[0] == dice[1])):
            board_copy = np.copy(board)

            if player == randomPlayer:
                move = flipped_agent.action(board_copy, dice, player, i)
            else:
                move = action(board_copy, dice, player, i)

            # update the board
            if len(move) != 0:
                for m in move:
                    board = BG.update_board(board, m, player)

            # give status after every move:
            if commentary:
                print("move from player", player, ":")
                BG.pretty_print(board)

        # players take turns
        player = -player

    # return the winner
    return -1 * player
예제 #20
0
def evaluate(agent, evaluation_agent, n_eval, n_games):
    wins = 0
    for i in range(n_eval):
        winner, board = Backgammon.play_a_game(agent, evaluation_agent)
        wins += int(winner == 1)
    winrate = round(wins / n_eval * 100, 3)
    print("Win-rate after training for " + str(n_games) + " games: " +
          str(winrate) + "%")
    return winrate
예제 #21
0
    def PlayPubEval(self, test_games=1):
        wins = []

        for _ in range(test_games):

            env = backgammon()
            done = False

            while not done:
                dice = B.roll_dice()
                for _ in range(1 + int(dice[0] == dice[1])):

                    possible_moves, possible_boards = env.legal_moves(dice, 1)
                    n_actions = len(possible_moves)

                    if n_actions == 0:
                        break

                    action = self.sample_action(possible_boards)
                    old_board, new_board, reward, done = env.step(
                        possible_moves[action], player=1)

                    if done:
                        break

                if not done:
                    #env.swap_player()
                    dice = B.roll_dice()

                    for __ in range(1 + int(dice[0] == dice[1])):
                        action = pubeval.agent_pubeval(np.copy(env.board),
                                                       dice,
                                                       oplayer=-1)
                        old_board, new_board, reward, done = env.step(
                            action, player=-1)
                        if B.check_for_error(env.board):
                            PubEvalErBilað
                        if done:
                            reward = 0
                            break
            #env.swap_player()
            wins.append(float(reward == 1))

        return (np.mean(wins))
예제 #22
0
def e_legal_moves(board, dice, player=1):
    moves, boards = B.legal_moves(board, dice=dice, player=player)
    if len(boards) == 0:
        return [], features(board, player)
    n_boards = np.shape(boards)[0]
    tesauro = np.zeros((n_boards, 198))
    for b in range(n_boards):
        tesauro[b, :] = features(boards[b], player)
    tesauro = np.array(tesauro)
    return moves, tesauro
예제 #23
0
def action(net, board_copy, dice, player, i, learn=True):
    # the champion to be
    # inputs are the board, the dice and which player is to move
    # outputs the chosen move accordingly to its policy

    if player == -1: board_copy = flip_board(board_copy)  ##Flip the board
    # check out the legal moves available for the throw
    if (player == 1):
        xold = net.xold
        net.xnew = board_copy
    else:  ########################################################################
        xold = net.xFlipOld
        net.xFlipNew = board_copy

    possible_moves, possible_boards = Backgammon.legal_moves(board_copy,
                                                             dice,
                                                             player=1)

    # if there are no moves available
    if len(possible_moves) == 0:
        return []

    one_hot = []
    for b in possible_boards:
        one_hot.append(oneHot(b))

    if learn:
        if not net.firstMove:
            net.update(player)

    m, xtheta = net.actor(one_hot, possible_moves)
    if player == 1:
        net.xtheta = xtheta
    else:
        net.flipxtheta = xtheta

    move = possible_moves[m]
    newBoard = possible_boards[m]

    #    if learn:
    #        if not net.firstMove:
    #            net.update(player)

    if player == -1: move = flip_move(move)  ###Flip the move

    if player == 1:
        net.xold = board_copy
    else:
        net.xFlipOld = board_copy
        net.firstMove = False

    return move
예제 #24
0
 def step(self, move, player=1):
     old_board = np.copy(self.board)
     if len(move) != 0:
         for m in move:
             self.board = B.update_board(board=self.board,
                                         move=m,
                                         player=player)
     reward = 0
     self.done = False
     if self.iswin():
         reward = player
         self.done = True
     return old_board, np.copy(self.board), reward, self.done
예제 #25
0
def action(board_copy, dice, player, i, learning=False):
    if player == -1:
        board_copy = flip_board(board_copy)

    # Get every possible move and board
    xtheta_mean = torch.zeros((len(theta), 1))
    possible_moves, possible_boards = BG.legal_moves(board_copy,
                                                     dice,
                                                     player=1)
    na = len(possible_moves)
    one_hot_boards = np.zeros((2 * (n - 1) * 7, na))
    j = 0
    # if there are no moves available
    if len(possible_moves) == 0:
        x = Variable(
            torch.tensor(ice_hot_encoding(board_copy),
                         dtype=torch.float,
                         device=device)).view(2 * (n - 1) * 7, 1)
        h_sigmoid = feed_forward_th(x)
        pi = torch.mm(theta, h_sigmoid).softmax(0)
        xtheta_mean = h_sigmoid * pi.item()
        if learning == True:
            return [], xtheta_mean
        else:
            return []

    for board in possible_boards:
        # encode the board to create the input for the NN
        x = Variable(
            torch.tensor(ice_hot_encoding(board),
                         dtype=torch.float,
                         device=device)).view(2 * (n - 1) * 7, 1)
        one_hot_boards[:, j] = x[:, 0]
        j += 1
    # select the move from a distribution
    X = Variable(torch.tensor(one_hot_boards, dtype=torch.float,
                              device=device))
    h = feed_forward_th(X)
    h_sigmoid = h.sigmoid()
    pi = torch.mm(theta, h_sigmoid).softmax(1)
    xtheta_mean = torch.sum(torch.mm(h_sigmoid, torch.diagflat(pi)), 1)
    xtheta_mean = torch.unsqueeze(xtheta_mean, 1)
    move_index = torch.multinomial(pi, num_samples=1)
    move = possible_moves[move_index]
    if player == -1:
        move = flip_move(move)

    if learning == True:
        return move, xtheta_mean

    return move
예제 #26
0
    def PlayRandomAgent(self, test_games=1):
        wins = []

        for _ in range(test_games):

            env = backgammon()
            done = False

            while not done:
                dice = B.roll_dice()
                for __ in range(1 + int(dice[0] == dice[1])):

                    possible_moves, possible_boards = env.legal_moves(dice, 1)
                    n_actions = len(possible_moves)

                    if n_actions == 0:
                        break

                    action = self.sample_action(possible_boards)
                    old_board, new_board, reward, done = env.step(
                        possible_moves[action])

                    if done:
                        break

                if not done:
                    dice = B.roll_dice()

                    for _ in range(1 + int(dice[0] == dice[1])):
                        old_board, new_board, reward, done = env.make_move(
                            dice)
                        if done:
                            reward = 0
                            break

            wins.append(float(reward == 1))

        return reward
def action(board_copy, dice, player, i):
    # the champion to be
    # inputs are the board, the dice and which player is to move
    # outputs the chosen move accordingly to its policy
    move = []

    # check out the legal moves available for the throw
    possible_moves, possible_boards = Backgammon.legal_moves(board_copy, dice)

    # make the best move according to the policy
    if len(possible_moves) != 0:
        move = policy(possible_moves, possible_boards, dice, i)

    return move
예제 #28
0
def action(board_copy, dice, player, i):
    
    if player == -1:
        board_copy = FA.flip_board(np.copy(board_copy))
    possible_moves, possible_boards = B.legal_moves(board_copy, dice, 1)
    
    if len(possible_moves) == 0:
        return []
    
    action = AgentJ.sample_action(np.vstack(possible_boards))
    move = possible_moves[action]
    if player == -1:
        move = FA.flip_move(move)
    return move
def compete(agent):
    winners = {}
    winners["1"] = 0
    winners["-1"] = 0
    for g in range(100):

        board = Backgammon.init_board()

        if (0 == np.random.randint(2)):
            player = 1
        else:
            player = -1

        isGameOver = False
        while (isGameOver == False):
            dice = Backgammon.roll_dice()
            for repeat in range(1 + int(dice[0] == dice[1])):
                if (player == -1):
                    action = Backgammon.random_agent(np.copy(board), dice,
                                                     player, repeat)
                else:
                    action = agent.greedy_action(np.copy(board), dice, player,
                                                 repeat)
                for i in range(0, len(action)):
                    board = Backgammon.update_board(board, action[i], player)
            if (1 == Backgammon.game_over(board)):
                winner = player
                isGameOver = True
                break
            player = -player
        winners[str(winner)] += 1

# numWins.append(winners["1"])
    print("Out of", 100, "games,")
    print("player", 1, "won", winners["1"], "times and")
    print("player", -1, "won", winners["-1"], "times")
    return winners["1"]
예제 #30
0
def epsilon_nn_greedy(board, player, epsilon, w1, b1, w2, b2, debug=False):
    moves = Backgammon.legal_moves(board)
    if np.random.uniform() < epsilon:
        if debug is True:
            print("explorative move")
        return np.random.choice(moves, 1)
    na = np.size(moves)
    va = np.zeros(na)
    for i in range(0, na):
        board[moves[i]] = player
        # encode the board to create the input

        # FEATURES eru X

        # va[i] = y.sigmoid()
    return moves[np.argmax(va)]