Ejemplo n.º 1
0
def alpha_beta_move(board,active_turn,depth,alpha = 2):
    swap_dict = {1:-1,-1:1}
    dummy_board = np.zeros((6,7))
    dummy_board[:] = board[:]
    options = cccc.available_moves(board)
    random.shuffle(options)
    if len(options) == 1:
        dummy_board[np.where(dummy_board[:,options[0]]==0)[0][-1],options[0]] = active_turn
        if cccc.winner(dummy_board):
            return (1,options[0]+1)
        else:
            return (0,options[0]+1)
    if depth ==0:
        return (0, options[np.random.randint(len(options))]+1)

    best_value = -2
    candidate_move = None
    for x in options:
        height = np.where(dummy_board[:,x]==0)[0][-1]
        dummy_board[height,x] = active_turn
        if cccc.winner(dummy_board):
            return (1, x+1)
        (opp_value,opp_move) = alpha_beta_move(dummy_board,swap_dict[active_turn],depth-1,-best_value)
        if -opp_value > best_value:
            candidate_move = x+1
            best_value = -opp_value
        if -opp_value >= alpha:
            #print (options, x, best_value, alpha)
            break
        dummy_board[height,x] = 0

    return (best_value, candidate_move)
def alpha_beta_move(board,
                    active_turn,
                    depth,
                    evaluation=lambda x: 0,
                    alpha=2):
    swap_dict = {1: -1, -1: 1}
    dummy_board = np.copy(board)
    dummy_board = dummy_board.reshape((6, 7))
    options = cccc.available_moves(dummy_board)
    random.shuffle(options)
    if len(options) == 1:
        dummy_board[np.where(dummy_board[:, options[0]] == 0)[0][-1],
                    options[0]] = active_turn
        if cccc.winner(dummy_board):
            return (1, options[0] + 1)
        else:
            return (0, options[0] + 1)
    if depth == 0:
        best_value = -2
        for x in options:
            height = np.where(dummy_board[:, x] == 0)[0][-1]
            dummy_board[height, x] = active_turn
            eval_board = evaluation(dummy_board * active_turn)
            if eval_board > best_value:
                best_value = eval_board
                candidate_move = x + 1
            dummy_board[height, x] = 0
        return (best_value, candidate_move)

    best_value = -2
    candidate_move = None
    for x in options:
        height = np.where(dummy_board[:, x] == 0)[0][-1]
        dummy_board[height, x] = active_turn
        if cccc.winner(dummy_board):
            return (1, x + 1)
        (opp_value,
         opp_move) = alpha_beta_move(dummy_board, swap_dict[active_turn],
                                     depth - 1, evaluation, -best_value)
        if -opp_value > best_value:
            candidate_move = x + 1
            best_value = -opp_value
        if -opp_value >= alpha:
            #print (options, x, best_value, alpha)
            break
        dummy_board[height, x] = 0

    return (best_value, candidate_move)
Ejemplo n.º 3
0
def get_max_future(future_board,value_fun):
    options = cccc.available_moves(future_board)
    dummy_board = np.copy(future_board)
    move_values = np.zeros(7)
    for move in options:
        dummy_board = np.copy(future_board)
        dummy_board[np.where(dummy_board[:,move]==0)[0][-1],move] = -1
        # dummy_board = dummy_board.reshape(1,42)
        if cccc.winner(dummy_board):
            move_values[move] = cccc.winner(dummy_board)
        else:
            reshapable = np.copy(dummy_board)
            reshapable = reshapable.reshape(1,42)
            move_values[move] = value_fun(reshapable)
    
    available_move_values = np.array([move_values[move] for move in options])
    dummy_board = np.copy(future_board)
    options_index = np.argmin(available_move_values)
    dummy_board[np.where(dummy_board[:,options[options_index]]==0)[0][-1],options[options_index]] = -1
    return np.amin(available_move_values), dummy_board
def alpha_beta_move(board,active_turn,depth,evaluation = lambda x: 0,alpha = 2):
    swap_dict = {1:-1,-1:1}
    dummy_board = np.copy(board)
    dummy_board = dummy_board.reshape((6,7))
    options = cccc.available_moves(dummy_board)
    random.shuffle(options)
    if len(options) == 1:
        dummy_board[np.where(dummy_board[:,options[0]]==0)[0][-1],options[0]] = active_turn
        if cccc.winner(dummy_board):
            return (1,options[0]+1)
        else:
            return (0,options[0]+1)
    if depth ==0:
        best_value = -2
        for x in options:
            height = np.where(dummy_board[:,x]==0)[0][-1]
            dummy_board[height,x] = active_turn
            eval_board = evaluation(dummy_board*active_turn)
            if  eval_board > best_value:
                best_value = eval_board
                candidate_move = x + 1
            dummy_board[height,x] = 0
        return (best_value, candidate_move)

    best_value = -2
    candidate_move = None
    for x in options:
        height = np.where(dummy_board[:,x]==0)[0][-1]
        dummy_board[height,x] = active_turn
        if cccc.winner(dummy_board):
            return (1, x+1)
        (opp_value,opp_move) = alpha_beta_move(dummy_board,swap_dict[active_turn],depth-1,evaluation,-best_value)
        if -opp_value > best_value:
            candidate_move = x+1
            best_value = -opp_value
        if -opp_value >= alpha:
            #print (options, x, best_value, alpha)
            break
        dummy_board[height,x] = 0

    return (best_value, candidate_move)
Ejemplo n.º 5
0
def value_move(board,active_turn,output_fun,exploration = 0):
    board = board.reshape((1,42))
    X_sym = theano.tensor.matrix()
    y_sym = theano.tensor.ivector()

    dummy_board = active_turn * board[:]
    options = cccc.available_moves(dummy_board)
    
    
    if exploration > random.random():
        move = random.choice(options) 
    else:
        move_values = np.zeros(42)
        for move in options:
            dummy_board = active_turn * board[:]
            dummy_board[0][move] = 1
            move_values[move] = -1 * output_fun(-1* dummy_board)
        

        available_move_values = np.array([move_values[move] for move in options])
        
        move = options[available_move_values.argmax(-1)]
    return move + 1
Ejemplo n.º 6
0
def alpha_beta_move(board, turn, depth = 0, alpha = (-inf,-inf), beta = (inf,inf), evaluation = lambda x: 0):
    dummy_board = np.copy(board) # we don't want to change the board state

    swap_player = {1:-1,-1:1} # So we can change whose turn
    options = cccc.available_moves(board) # get legal moves
    random.shuffle(options) # should inherit move order instead of randomizing


#     if len(options) == 1:
#         update_move(board,options[0])
#         if cccc.winner(dummy_board):
#             return (inf,options[0])
#         else:
#             return (0,options[0])   
    
    best_value = (-inf,-inf)
    
    if not options:
        print board, cccc.game_over(board)
        print 'oops, no available moves'
    cand_move = options[0]
    if depth == 0: 
        for x in options:
            update_move(dummy_board,x,turn)
            op_value = (evaluation(dummy_board*swap_player[turn]) , depth)

            if tuple(-1 * el for el in op_value) > best_value:
                cand_move = x
                best_value = tuple(-1 * el for el in op_value)
                alpha = max(alpha, best_value)
    #        print depth,-op_value, best_value, cand_move,alpha,beta
            if alpha >= beta:
    #                print 'pruned'
                break   #alpha-beta cutoff
            unupdate_move(dummy_board,x)
    else:
    
    
    
        for x in options:

    #        dummy_board = np.copy(board)
    #        height= np.where(board[:,x]==0)[0][-1] #connect four only
    #        dummy_board[height, x] = turn
            update_move(dummy_board,x,turn)
        
            if cccc.winner(dummy_board): #should check over and tied too
                return((inf,depth), x)
            
            if cccc.is_full(dummy_board): #This assumes you can't lose on your turn
                return((0,depth) , x)
            
            op_value,_ = alpha_beta_move( dummy_board,
                                            swap_player[turn],
                                            depth-1,
                                            alpha = tuple(-1 * el for el in beta),
                                            beta = tuple(-1 * el for el in alpha),
                                            evaluation = evaluation)

            if tuple(-1 * el for el in op_value) > best_value:
                cand_move = x
                best_value = tuple(-1 * el for el in op_value)
                alpha = max(alpha, best_value)
    #        print depth,-op_value, best_value, cand_move,alpha,beta
            if alpha >= beta:
    #                print 'pruned'
                break   #alpha-beta cutoff
            unupdate_move(dummy_board,x)
    #        dummy_board[height, x] = 0
    return (best_value, cand_move)