Beispiel #1
0
class Negascout(object):
    def __init__(self, board, colour):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0
        self.total_time = 0
        # load the evaluation function based on the colour of the player
        if self.player == constant.WHITE_PIECE:
            self.evaluation = Evaluation("./XML", "/white_weights")
        else:
            self.evaluation = Evaluation("./XML", "/black_weights")

    '''
    * Alpha Beta - Minimax Driver Function 
    '''

    def itr_negascout(self):
        colour = self.player
        # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big
        # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
        if self.board.phase == constant.PLACEMENT_PHASE:
            # clear the transposition table every time we want to evaluate a move in placement phase
            # this is to limit the size of growth
            self.tt.clear()

            # set the max depth iterations based on the phase that we are in
            MAX_ITER = 5
        else:
            MAX_ITER = 11

        # update the root number of pieces every time we do a search on a new node
        self.board.root_num_black = len(self.board.black_pieces)
        self.board.root_num_white = len(self.board.white_pieces)

        # default policy
        available_actions = self.board.update_actions(colour)
        action_set = set(available_actions)

        if len(available_actions) == 0:
            return None

        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = 1500
        else:
            self.time_alloc = 1200

            # if we have reached 100 moves in the game and the game
            if self.total_time > 90000 or self.board.move_counter > 120:
                self.time_alloc = 500
                # if we are near the final shrinking phase, then we can decrease the time it has to
                # evaluate
                if self.board.move_counter > 150:
                    self.time_alloc = 150

        best_depth = 1
        val, move = 0, None
        best_move = None
        self.time_rem = self.time_alloc

        # iterative deepening begins here
        for depth in range(1, MAX_ITER):
            print(self.tt.size)
            print(depth)
            try:

                self.time_start = self.curr_millisecond_time()
                val, move = self.negascout(depth, -inf, inf, self.player)
                # move = self.negascout(depth,self.player)
                self.time_end = self.curr_millisecond_time()

                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)
                print(move)
                best_depth += 1

                if move is not None and move in action_set:
                    best_move = move

            except TimeOut:
                print("TIMEOUT")
                break
        # add the time allocated to the total time
        self.total_time += self.time_alloc
        self.eval_depth = best_depth
        return best_move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    def negascout(self, depth, alpha, beta, colour):
        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)
        original_alpha = alpha
        dic = {self.player: 1, self.opponent: -1}

        move_to_try = None
        # check if the current board state is in the transposition table
        board_str = self.board.board_state.decode("utf-8")

        key = self.tt.contains(board_str, colour, phase=self.board.phase)
        if key is not None:
            board_str = key[0]
            entry = self.tt.get_entry(board_str, colour)
            tt_value = entry[0]
            tt_type = entry[1]
            tt_best_move = entry[2]
            tt_depth = entry[3]

            # if we have found an entry in the transposition table, then the move
            # we should try first is this best move
            move_to_try = tt_best_move
            #print(move_to_try)
            #print("FOUND ENTRY IN TT")

            if tt_depth >= depth:
                if tt_type == constant.TT_EXACT:
                    #print("FOUND PV")
                    return tt_value, tt_best_move
                elif tt_type == constant.TT_LOWER:
                    if tt_value > alpha:
                        #print("FOUND FAIL SOFT")
                        alpha = tt_value

                elif tt_type == constant.TT_UPPER:
                    if tt_value < beta:
                        #print("FOUND FAIL HARD")
                        beta = tt_value

                if alpha >= beta:
                    return tt_value, tt_best_move

        actions = self.board.update_actions(colour)
        # actions = actions_1
        actions = self.board.sort_actions(actions, colour)
        #actions = actions_1
        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the minimax search
        best_val = -inf
        best_action = None

        if move_to_try is not None and move_to_try in actions:
            #print("MOVE ORDERING")
            # put the move to try at the first position -- therefore it will be searched first
            actions = [move_to_try] + actions

        i = 0
        if len(actions) <= 12:
            favourable = actions
        elif 12 < len(actions) < 20:
            favourable = actions[:12]
        else:
            favourable = actions[:len(actions) // 2]
        # print(len(actions))

        # start negascout here
        for i, action in enumerate(favourable):
            # skip over the best action in the tt table
            if action == move_to_try and i > 0:
                continue

            elim = self.board.update_board(action, colour)

            # if we are at the first node -- this is the best node we have found so far
            # therefore we do a full search on this node
            if i == 0:
                # do a full search on the best move found so far
                score, _ = self.negascout(depth - 1, -beta, -alpha, opponent)
                score = -score

            else:
                # assume that the first move is the best move we have found so far,
                # therefore to see if this is the case we can do a null window search on the
                # rest of the moves, if the search breaks, then we know that the first move is
                # the best move and it will return the best move
                # but if the search "failed high" - i.e. the score is between alpha and beta
                # we need to do a full research of the node to work out the minimax value

                # do the null window search
                score, _ = self.negascout(depth - 1, -alpha - 1, -alpha,
                                          opponent)
                score = -score

                # if it failed high, then we just do a full search to find the actual best move
                if alpha < score < beta:
                    score, _ = self.negascout(depth - 1, -beta, -score,
                                              opponent)
                    score = -score

            # get the best value and score
            if best_val < score:
                best_val = score
                best_action = action

            # reset alpha
            if alpha < score:
                alpha = score

            # undo the action applied to the board -- we can now apply another move to the board
            self.undo_actions(action, colour, elim)

            # test for alpha beta cutoff
            if alpha >= beta:
                break

        # store the values in the transposition table
        if best_val <= original_alpha:
            # then this is an upperbound -FAILHARD
            tt_type = constant.TT_UPPER
        elif best_val >= beta:
            tt_type = constant.TT_LOWER
            # print("LOWER")
        else:
            tt_type = constant.TT_EXACT
            # print("EXACT")

        # add the entry to the transposition table
        self.tt.add_entry(self.board.board_state, colour, best_val, tt_type,
                          best_action, depth)

        return best_val, best_action

    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    '''
    * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES 
            -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended 
            -- Need to work out some optimisations of the algorithm though 

    '''

    def evaluate_state(self, board, colour, actions):
        return self.evaluation.evaluate(board, colour, actions)

    # update the available moves of the search algorithm after it has been instantiated
    #
    # def update_available_moves(self, node, available_moves):
    #    node.available_moves = available_moves

    def update_board(self, board):
        self.board = deepcopy(board)

    def is_terminal(self):
        return self.board.is_terminal()

    def undo_actions(self, action, colour, elim):
        return self.board.undo_action(action, colour, elim)
class Negamax(object):
    def __init__(self, board, colour):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0

        self.evaluation = Evaluation("./XML", "/eval_weights")

    '''
    * Alpha Beta - Minimax Driver Function 
    '''

    def itr_negamax(self):
        # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big
        if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
            #if self.board.phase == constant.PLACEMENT_PHASE:
            self.tt.clear()

        MAX_ITER = 10

        # default policy
        available_actions = self.board.update_actions(self.board, self.player)
        # self.actions_leftover = self.board.update_actions(self.board, self.player)

        if len(available_actions) == 0:
            return None
        #else:
        # lets just set the default to the first move
        #    move = available_actions[0]

        # time allocated per move in ms
        '''
        self.time_alloc = 0
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter)
        else:
            self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter)
        '''

        self.time_alloc = 5000

        # get time
        start_time = Negamax.curr_millisecond_time()
        best_depth = 1
        val, move = 0, None
        # iterative deepening begins here
        for depth in range(1, MAX_ITER):
            print(self.tt.size)
            print(depth)
            try:
                self.time_rem = self.time_alloc
                self.time_start = self.curr_millisecond_time()
                val, move = self.negamax(depth, -inf, inf, self.player)
                self.time_end = self.curr_millisecond_time()

                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)
                print(move)
                best_depth += 1
            except TimeOut:
                print("TIMEOUT")
                break

            if Negamax.curr_millisecond_time() - start_time > self.time_alloc:
                break

        self.eval_depth = best_depth
        return move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    def negamax(self, depth, alpha, beta, colour):
        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)
        original_alpha = alpha
        dic = {self.player: 1, self.opponent: -1}
        '''
        move_to_try = None
        # check if the current board state is in the transposition table
        board_str = self.board.board_state.decode("utf-8")

        key = self.tt.contains(board_str,colour,phase=self.board.phase)
        if key is not None:
            board_str = key[0]
            entry = self.tt.get_entry(board_str,colour)
            tt_value = entry[0]
            tt_type = entry[1]
            tt_best_move = entry[2]
            tt_depth = entry[3]

            # if we have found an entry in the transposition table, then the move
            # we should try first is this best move
            move_to_try = tt_best_move

            #print("FOUND ENTRY IN TT")
            if tt_depth >= depth:
                if tt_type == constant.TT_EXACT:
                    #print("FOUND PV")
                    return tt_value, tt_best_move
                elif tt_type == constant.TT_LOWER:
                    if tt_value > alpha:
                        #print("FOUND FAIL SOFT")
                        alpha = tt_value

                elif tt_type == constant.TT_UPPER:
                    if tt_value < beta:
                        #print("FOUND FAIL HARD")
                        beta = tt_value

                if alpha >= beta:
                    return tt_value, None
        '''
        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player)  #*dic[colour]
            return val, None

        # do the minimax search
        best_val = -inf
        best_action = None
        actions = self.board.update_actions(self.board, colour)
        '''
        if move_to_try is not None and move_to_try in actions:
            #print("MOVE ORDERING")
            # put the move to try at the first position -- therefore it will be searched first
            actions = [move_to_try] + actions
        i = 0
        '''
        # get the favourable moves of the board
        actions = self.get_favourable_actions(self.available_actions)
        # if there are no favourable actions to iterate on - raise
        if len(actions) < 0:
            raise ReturnUnfavourableMove

        for action in actions:
            # skip over the best action in the tt table
            '''
            if action == move_to_try and i!= 0:
                continue
            i+=1
            '''
            self.board.update_board(action, colour)
            score, temp = self.negamax(depth - 1, -beta, -alpha, opponent)
            score = -score

            if score > best_val:
                best_val = score
                best_action = action

            if score > alpha:
                alpha = score

            self.undo_move()

            if alpha >= beta:
                break
        '''
        # store the values in the transposition table
        if best_val <= original_alpha:
            # then this is an upperbound -FAILHARD
            tt_type = constant.TT_UPPER
        elif best_val >= beta:
            tt_type = constant.TT_LOWER
            # print("LOWER")
        else:
            tt_type = constant.TT_EXACT
            # print("EXACT")
        '''
        # add the entry to the transposition table
        # self.tt.add_entry(self.board.board_state,colour,best_val,tt_type,best_action, depth)
        return best_val, best_action

    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    '''
    * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES 
            -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended 
            -- Need to work out some optimisations of the algorithm though 

    '''

    def evaluate_state(self, board, colour):
        #return Evaluation.basic_policy(board,colour)
        return self.evaluation.evaluate(board, self.player)

    # update the available moves of the search algorithm after it has been instantiated
    #
    # def update_available_moves(self, node, available_moves):
    #    node.available_moves = available_moves

    def update_board(self, board):
        self.board = deepcopy(board)

    def is_terminal(self):
        return self.board.is_terminal()

    def undo_move(self):
        return self.board.undo_move()
class Negamax(object):
    def __init__(self, board, colour):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0

        self.evaluation = Evaluation("./XML", "/eval_weights")

    '''
    * Alpha Beta - Minimax Driver Function 
    '''

    def itr_negamax(self):
        # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big
        # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
        #if self.board.phase == constant.PLACEMENT_PHASE:
        self.tt.clear()

        MAX_ITER = 10

        # default policy
        available_actions = self.board.update_actions(self.board, self.player)
        print(len(available_actions))
        action_set = set(available_actions)
        # self.actions_leftover = self.board.update_actions(self.board, self.player)

        if len(available_actions) == 0:
            return None
        #else:
        # lets just set the default to the first move
        #    move = available_actions[0]

        # time allocated per move in ms
        '''
        self.time_alloc = 0
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter)
        else:
            self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter)
        '''

        # self.time_alloc = 5000
        # time allocated per move in ms
        self.time_alloc = 0
        total = 120000
        if self.board.phase == constant.PLACEMENT_PHASE:
            #self.time_alloc = (total/2 - self.time_alloc) / (24 - self.board.move_counter)
            #total -= self.time_alloc
            self.time_alloc = 1000
        else:
            #self.time_alloc = (total - self.time_alloc) / (100 - self.board.move_counter)
            #total -= self.time_alloc
            self.time_alloc = 1000
        # get time
        start_time = Negamax.curr_millisecond_time()
        best_depth = 1
        val, move = 0, None
        # iterative deepening begins here
        best_move = None
        for depth in range(1, MAX_ITER):
            print(self.tt.size)
            print(depth)
            try:
                self.time_rem = self.time_alloc
                self.time_start = self.curr_millisecond_time()
                val, move = self.negamax(depth, -inf, inf, self.player)
                self.time_end = self.curr_millisecond_time()

                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)
                print(move)
                best_depth += 1

                # if we have a move that is not none lets always pick that move that is legal
                # becuase we are doing a greedy search -- it sometimes returns an illegal move, not too sure why
                # therefore here we check if a move is legal as well
                if move is not None and move in action_set:
                    best_move = move
            except TimeOut:
                print("TIMEOUT")
                break

            if Negamax.curr_millisecond_time() - start_time > self.time_alloc:
                break

        self.eval_depth = best_depth
        return best_move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    # naive Negamax (depth limited)  -- No Transposition Table
    def negamax(self, depth, alpha, beta, colour):
        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)
        dic = {self.player: 1, self.opponent: -1}

        # generate legal actions
        actions_1 = self.board.update_actions(self.board, colour)
        # print(len(actions))
        actions = self.board.sort_actions(actions_1, colour)

        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the minimax search
        best_val = -inf
        best_action = None

        # generate legal actions
        #actions = self.board.update_actions(self.board, colour)
        # split the actions into favourable an unfavourable
        # if the length of actions is greater than X, then we can just choose to look through the first
        # 5 'favourable' actions that we see right now
        # if the length of actions is less than X, then we can just evaluate all possible actions we have
        # THIS IS A GREEDY APPROACH TO MINIMAX THAT LIMITS OUR BRANCHING FACTOR OF THE GAME
        if len(actions) > 8:
            favourable = actions[:8]
        else:
            favourable = actions
        # got here
        #print("got here")
        # depth reduction
        R = 2
        #print(favourable)
        #self.board.print_board()
        for action in favourable:

            self.board.update_board(action, colour)
            if action in favourable:
                score, temp = self.negamax(depth - 1, -beta, -alpha, opponent)
            else:
                score, temp = self.negamax(depth - 1 - R, -beta, -alpha,
                                           opponent)

            score = -score

            if score > best_val:
                best_val = score
                best_action = action

            if score > alpha:
                alpha = score

            self.undo_move()

            if alpha >= beta:
                break

        return best_val, best_action

    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    '''
    * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES 
            -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended 
            -- Need to work out some optimisations of the algorithm though 

    '''

    def evaluate_state(self, board, colour, actions):
        #return Evaluation.basic_policy(board,colour)
        return self.evaluation.evaluate(board, colour, actions)

    # update the available moves of the search algorithm after it has been instantiated
    #
    # def update_available_moves(self, node, available_moves):
    #    node.available_moves = available_moves

    def update_board(self, board):
        self.board = deepcopy(board)

    def is_terminal(self):
        return self.board.is_terminal()

    def undo_move(self):
        return self.board.undo_move()
class Negamax(object):
    def __init__(self, board, colour, file_name):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # timing attributes
        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0
        self.total_time = 0

        # load the evaluation function based on the colour of the player
        if self.player == constant.WHITE_PIECE:
            self.evaluation = Evaluation("./XML", "/white_weights")
        else:
            self.evaluation = Evaluation("./XML", "/black_weights")

    '''
    Iterative Deepening Negamax 
    
    This implements a time-cutoff such that search is terminated once we have reached the allocated time for evaluation.
    
    IT RETURNS THE BEST MOVE IT HAS FOUND IN THE TIME ALLOCATED 
    '''

    def itr_negamax(self):
        # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big
        # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
        if self.board.phase == constant.PLACEMENT_PHASE:
            # clear the transposition table every time we want to evaluate a move in placement phase
            # this is to limit the size of growth
            self.tt.clear()

            # set the max depth iterations based on the phase that we are in
            MAX_ITER = 5
        else:
            MAX_ITER = 11

        # update the root number of pieces every time we do a search on a new node
        self.board.root_num_black = len(self.board.black_pieces)
        self.board.root_num_white = len(self.board.white_pieces)

        # default policy
        available_actions = self.board.update_actions(self.player)

        # if there are no available actions to make, therefore we just return None -- this is a forfeit
        if len(available_actions) == 0:
            return None

        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = 1500
        else:
            self.time_alloc = 1200

            # if we have reached 100 moves in the game and the game
            if self.total_time > 90000 or self.board.move_counter > 120:
                self.time_alloc = 500
                # if we are near the final shrinking phase, then we can decrease the time it has to
                # evaluate
                if self.board.move_counter > 150:
                    self.time_alloc = 190

        best_depth = 1
        val, move = 0, None

        # set the time remaining for each move evaluation
        self.time_rem = self.time_alloc

        # iterative deepening begins here
        for depth in range(1, MAX_ITER):
            # get the best move until cut off is reached
            try:

                self.time_start = self.curr_millisecond_time()
                val, move = self.negamax(depth, -inf, inf, self.player)
                self.time_end = self.curr_millisecond_time()

                # update the time remaining
                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)

                best_depth += 1
            except TimeOut:
                break

        # add the total time to the time allocated
        self.total_time += self.time_alloc

        # print(self.total_time)
        print(best_depth - 1)

        self.eval_depth = best_depth - 1
        return move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    # get the current time in milliseconds
    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    '''
    NEGAMAX DRIVER FUNCTION -- THIS IMPLEMENTS THE FOLLOWING:
        - NEGAMAX WITH A TRANSPOSITION TABLE 
        - MOVE ORDERING USING THE BEST MOVE WE HAVE FOUND SO FAR (IF IT EXISTS IN THE TRANSPOSITION TABLE) 
        - MOVE ORDERING OF THE MOVES WE THINK TO BE FAVOURABLE USING A LIGHTWEIGHT EVALUATION FUNCTION 
        - SELECTING ONLY THE TOP FAVOURABLE MOVES TO EVALUATE USING MINIMAX -- THIS IS HEAVY GREEDY PRUNING 
          APPLIED TO NEGAMAX DESIGNED SUCH THAT WE ONLY LOOK AT MOVES THAT WE THINK WILL PRODUCE A GOOD OUTCOME,
          THUS PRUNING ANY MOVES THAT HAVE A HIGH CHANGE OF HAVING NO EFFECT ON THE GAME-STATE UTILITY.
    '''

    def negamax(self, depth, alpha, beta, colour):

        # print(self.board.board_state)

        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)
        original_alpha = alpha
        dic = {self.player: 1, self.opponent: -1}

        move_to_try = None
        # check if the current board state is in the transposition table
        board_str = self.board.board_state.decode("utf-8")

        key = self.tt.contains(board_str, colour, phase=self.board.phase)
        if key is not None:

            # get the value mappings from the dictionary
            board_str = key[0]
            entry = self.tt.get_entry(board_str, colour)
            tt_value = entry[0]
            tt_type = entry[1]
            tt_best_move = entry[2]
            tt_depth = entry[3]

            # if we have found an entry in the transposition table, then the move
            # we should try first is this best move
            move_to_try = tt_best_move

            if tt_depth >= depth:
                # this is the PV node therefore this is the best move that we have found so far
                if tt_type == constant.TT_EXACT:
                    return tt_value, tt_best_move

                # the minimax value in the transposition table is a lower bound to the search
                elif tt_type == constant.TT_LOWER:
                    if tt_value > alpha:
                        alpha = tt_value

                # the value in the table corresponds to a beta cutoff and therefore it is an upper bound for beta
                elif tt_type == constant.TT_UPPER:
                    if tt_value < beta:
                        beta = tt_value

                # test for cutoff -- return the best move found so far
                if alpha >= beta:
                    return tt_value, tt_best_move

        # obtain the actions and sort them
        actions = self.board.update_actions(colour)
        actions = self.board.sort_actions(actions, colour)

        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the negamax search  search
        best_val = -inf
        best_action = None

        # if we have found a best action to take in the transposition table, this should be the first
        # move we should try -- put this at the start of the list of actions
        if move_to_try is not None and move_to_try in actions:
            # put the move to try at the first position -- therefore it will be searched first
            actions = [move_to_try] + actions

        i = 0
        # split the list of actions into favourable and unfavourable actions
        # we only consider to search teh favourable actions if the action list is long enough
        if len(actions) <= 12:
            favourable = actions
        elif 12 < len(actions) < 20:
            favourable = actions[:12]
        else:
            favourable = actions[:len(actions) // 2]

        # iterate only through the favourable moves, ensuring that the number of moves is not too big
        # the aim is to reduce the branching factor as much as we can, but also having enough moves to
        # evaluate such that we get the part of the optimality decision making  from negamax/minimax
        # rather than a purely greedy approach.
        # print(len(favourable))
        for action in favourable:

            # skip over the best action in the tt table -- this action has already be searched
            if action == move_to_try and i != 0:
                continue
            i += 1

            # update the board, record the eliminated pieces from that update
            elim = self.board.update_board(action, colour)
            score, temp = self.negamax(depth - 1, -beta, -alpha, opponent)
            score = -score
            # undo the action applied to the board
            self.undo_action(action, colour, elim)

            # get the best score and action so far
            if score > best_val:
                best_val = score
                best_action = action

            # update alpha if needed
            if best_val > alpha:
                alpha = best_val

            # test for cut off
            if alpha >= beta:
                break

        # store the values in the transposition table
        if best_val <= original_alpha:
            # then this is an upper bound
            tt_type = constant.TT_UPPER
        elif best_val >= beta:
            # if the best value we have found is a lower bound
            tt_type = constant.TT_LOWER
            # print("LOWER")
        else:
            # this is the PV node value
            tt_type = constant.TT_EXACT

        # add the entry to the transposition table
        self.tt.add_entry(self.board.board_state, colour, best_val, tt_type,
                          best_action, depth)

        return best_val, best_action

    # cut-off test -- either depth is zero or the board is at terminal state
    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    # evaluate the game state
    def evaluate_state(self, board, colour, actions):
        return self.evaluation.evaluate(board, colour, actions)

    # update the negamax board representation for another search
    def update_board(self, board):
        self.board = deepcopy(board)

    # terminal state check
    def is_terminal(self):
        return self.board.is_terminal()

    # undo board wrapper
    def undo_action(self, action, colour, elim):
        return self.board.undo_action(action, colour, elim)
class Negamax(object):
    def __init__(self, board, colour):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0

        self.evaluation = Evaluation("./XML", "/eval_weights")

    '''
    * Alpha Beta - Minimax Driver Function 
    '''

    def itr_negamax(self):
        # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big
        # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0:
        #if self.board.phase == constant.PLACEMENT_PHASE:
        self.tt.clear()

        # update the root number of pieces every time we do a search on a new node
        self.board.root_num_black = len(self.board.black_pieces)
        self.board.root_num_white = len(self.board.white_pieces)

        # default policy
        available_actions = self.board.update_actions(self.player)

        action_set = set(available_actions)
        # self.actions_leftover = self.board.update_actions(self.board, self.player)

        if len(available_actions) == 0:
            return None
        #else:
        # lets just set the default to the first move
        #    move = available_actions[0]

        # time allocated per move in ms
        '''
        self.time_alloc = 0
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter)
        else:
            self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter)
        '''

        # self.time_alloc = 5000
        # time allocated per move in ms
        self.time_alloc = 0
        total = 120000
        if self.board.phase == constant.PLACEMENT_PHASE:
            #self.time_alloc = (total/2 - self.time_alloc) / (24 - self.board.move_counter)
            #total -= self.time_alloc
            self.time_alloc = 3000
        else:
            #self.time_alloc = (total - self.time_alloc) / (100 - self.board.move_counter)
            #total -= self.time_alloc
            self.time_alloc = 800

        # get time
        start_time = Negamax.curr_millisecond_time()
        best_depth = 1
        val, move = 0, None

        # iterative deepening begins here
        best_move = None

        for depth in range(1, MAX_ITER):

            print(depth)
            try:
                self.time_rem = self.time_alloc
                self.time_start = self.curr_millisecond_time()
                val, move = self.negamax(depth, -inf, inf, self.player)
                self.time_end = self.curr_millisecond_time()

                self.time_rem = self.time_alloc - (self.time_end -
                                                   self.time_start)
                print(move)
                best_depth += 1

                # if we have a move that is not none lets always pick that move that is legal
                # becuase we are doing a greedy search -- it sometimes returns an illegal move, not too sure why
                # therefore here we check if a move is legal as well
                if move is not None and move in action_set:
                    best_move = move
                # print(self.board)
                # print("sdfsfsfsfsfsdfsfsdfs")
            except TimeOut:
                break

        self.eval_depth = best_depth

        return best_move

    def set_player_colour(self, colour):
        self.player = colour
        self.opponent = Board.get_opp_piece_type(colour)

    @staticmethod
    def curr_millisecond_time():
        return int(time() * 1000)

    # naive Negamax (depth limited)  -- No Transposition Table
    def negamax(self, depth, alpha, beta, colour):
        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)

        # for evaluation
        dic = {self.player: 1, self.opponent: -1}

        # generate legal actions
        actions = self.board.update_actions(colour)

        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the minimax search
        best_val = -inf
        best_action = None
        #print(self.board)
        #print(actions)
        #print(self.board.white_pieces)
        # print(self.board.black_pieces)
        # generate legal actions
        # actions = self.board.update_actions(colour)
        # print("THESE ACTIONS----------------")
        # print(actions)
        # print(self.board)
        # print("*"*30)
        for action in actions:
            # print("THIS CALL--------")
            # print(self.board)
            # print("THIS CALL--------")
            # if self.board.phase == constant.MOVING_PHASE:
            #     piece = self.board.get_piece(action[0])
            #     direction = action[1]
            #     if piece.is_legal_move(direction) is False:
            #         print(actions)
            #         print(self)
            #         print("WHYYYYYYYYYYYYYY--------------------------------------------")
            #         print(action[0], direction, colour)
            #         print(piece)
            #         print(piece.get_legal_actions())

            elim = self.board.update_board(action, colour)
            score, temp = self.negamax(depth - 1, -beta, -alpha, opponent)
            self.undo_action(action, colour, elim)

            score = -score

            if score > best_val:
                best_val = score
                best_action = action

            if score > alpha:
                alpha = score

            if alpha >= beta:
                break

        return best_val, best_action

    def cutoff_test(self, depth):
        if depth == 0:
            return True

        if self.is_terminal():
            return True

        return False

    def evaluate_state(self, board, colour, actions):
        # return len(self.board.white_pieces) - len(self.board.black_pieces)
        return self.evaluation.evaluate(board, colour, actions)

    # update the available moves of the search algorithm after it has been instantiated
    #
    # def update_available_moves(self, node, available_moves):
    #    node.available_moves = available_moves

    def update_board(self, board):
        self.board = deepcopy(board)

    def is_terminal(self):
        return self.board.is_terminal()

    def undo_action(self, action, colour, elim_pieces):

        self.board.undo_action(action, colour, elim_pieces)