Example #1
0
    def expand(self, node):
        # print("expanded")
        # call on an expandable node to create one more child node -- we add the child to the leaf
        # then we update the current node we are at to this leaf node

        # choose an action -- choose randomly
        action_index = randint(0, len(node.untried_actions) - 1)

        action = node.untried_actions[action_index]
        # remove that action from the untried action list
        node.untried_actions.remove(action)

        # create the new node to be added to the game tree
        child = self.create_node(node.board,
                                 Board.get_opp_piece_type(node.colour), action,
                                 node)

        # apply the move to that child node
        # the parent applies its move to that board
        child.board.update_board(action, node.colour)
        child.update_actions()

        # add this child to the parents child list
        node.add_child(child)
        self.num_nodes += 1
        return child
Example #2
0
    def simulate(self, node):
        start_time = time()
        # simulate the state based on the actions of the child
        # this is the board on which we do our simulation
        board = deepcopy(node.board)
        #print(board.move_counter)
        #print(board.phase)
        #print(board.piece_pos)
        available_actions = board.update_actions(board, node.colour)
        #print("available_actions")
        #print(available_actions)
        #print(board.is_terminal())
        colour = node.colour
        num_moves = 0

        while board.is_terminal() is False:
            if len(available_actions) == 0:
                # there are no actions to take and therefore it is a forfeit
                action = None
            else:
                # pick a random move -- actions
                action_ind = randint(0, len(available_actions) - 1)
                action = available_actions[action_ind]

            # apply the action to the board
            board.update_board(action, colour)
            #board.print_board()
            # update the colour of the piece
            colour = Board.get_opp_piece_type(colour)
            # update the new available actions list -- this action list represents the
            # actions of next player -- this is the player that will make the next move
            available_actions = board.update_actions(board, colour)
        end_time = time()

        # print((end_time - start_time))
        # now we are at a terminal state, we need to find out who has won
        #print(board.winner)

        if board.winner == node.colour:
            return 1
        elif board.winner == Board.get_opp_piece_type(node.colour):
            return -1
        elif board.winner is None:
            return 0
class Player:
    def __init__(self, colour):
        if colour == 'white':
            self.colour = constant.WHITE_PIECE
        elif colour == 'black':
            self.colour = constant.BLACK_PIECE

        # each players internal board representation
        self.board = Board()

        self.opponent = self.board.get_opp_piece_type(self.colour)

    def update(self, action):

        # update the board based on the action of the opponent
        # get move type
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.board.update_board(action, self.opponent)

        elif self.board.phase == constant.MOVING_PHASE:
            if isinstance(action[0], tuple) is False:
                raise InvalidAction

            direction = self.board.convert_coord_to_direction(
                action[0], action[1])
            self.board.update_board((action[0], direction), self.opponent)

    def action(self, turns):
        available_actions = self.board.update_actions(self.colour)
        available_actions.sort()

        for i, action in enumerate(available_actions):
            print(str(i) + " : " + str(action))

        print("+" * 50)
        index = int(input("Enter move for {}: ".format(self.colour)))
        next_move = available_actions[index]
        print("+" * 50)

        print(self.board.move_counter)
        if self.board.phase == constant.PLACEMENT_PHASE:

            # making moves during the placement phase
            self.board.update_board(next_move, self.colour)
            return next_move
        else:
            new_pos = self.board.convert_direction_to_coord(
                next_move[0], next_move[1])
            # making moves during the placement phase
            self.board.update_board(next_move, self.colour)
            return next_move[0], new_pos
Example #4
0
    def __init__(self, board, colour):
        # we want to create a node

        self.tt = TranspositionTable()

        # only use this board to complete the search
        # save memory
        self.board = deepcopy(board)

        # for alpha beta search -- instead of passing it into the function calls we can use this
        self.alpha = -inf
        self.beta = inf

        # defines the colours of min and max
        self.player = colour
        self.opponent = Board.get_opp_piece_type(self.player)

        # default depth
        self.depth = inf

        # default move ordering with iterative deepening
        self.actions_evaluated = []
        self.actions_leftover = []

        # data structures for machine learning
        self.eval_depth = 0
        self.minimax_val = 0
        self.policy_vector = []

        # dictionary storing the available moves of the board
        self.available_actions = {
            constant.WHITE_PIECE: {},
            constant.BLACK_PIECE: {}
        }

        # generate the actions for the start of the game
        # self.generate_actions()

        self.undo_effected = []
        self.time_alloc = 0
        self.time_rem = 0
        self.time_start = 0
        self.time_end = 0
        self.total_time = 0
        # load the evaluation function based on the colour of the player
        if self.player == constant.WHITE_PIECE:
            self.evaluation = Evaluation("./XML", "/white_weights")
        else:
            self.evaluation = Evaluation("./XML", "/black_weights")
Example #5
0
class Player:
    def __init__(self, colour):
        if colour == 'white':
            self.colour = constant.WHITE_PIECE
        elif colour == 'black':
            self.colour = constant.BLACK_PIECE

        # each players internal board representation
        self.board = Board()

        self.opponent = self.board.get_opp_piece_type(self.colour)

    def update(self, action):

        # update the board based on the action of the opponent
        # get move type
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.board.update_board(action, self.opponent)

        elif self.board.phase == constant.MOVING_PHASE:
            if isinstance(action[0], tuple) is False:
                raise InvalidAction

            direction = self.board.convert_coord_to_direction(
                action[0], action[1])
            self.board.update_board((action[0], direction), self.opponent)
        # print("UPDATE BOARD _______________________________")
        # print(self.board)
        # print("UPDATE BOARD _______________________________")

    def action(self, turns):
        available_actions = self.board.update_actions(self.colour)
        next_action = available_actions[randint(0, len(available_actions) - 1)]

        if self.board.phase == constant.PLACEMENT_PHASE:
            # making moves during the placement phase
            self.board.update_board(next_action, self.colour)
            # print(next_action)
            return next_action
        else:
            new_pos = self.board.convert_direction_to_coord(
                next_action[0], next_action[1])
            # making moves during the placement phase
            # print(next_action)
            self.board.update_board(next_action, self.colour)
            #print(next_action)
            return next_action[0], new_pos
Example #6
0
    def negascout(self, depth, alpha, beta, colour):
        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)
        original_alpha = alpha
        dic = {self.player: 1, self.opponent: -1}

        move_to_try = None
        # check if the current board state is in the transposition table
        board_str = self.board.board_state.decode("utf-8")

        key = self.tt.contains(board_str, colour, phase=self.board.phase)
        if key is not None:
            board_str = key[0]
            entry = self.tt.get_entry(board_str, colour)
            tt_value = entry[0]
            tt_type = entry[1]
            tt_best_move = entry[2]
            tt_depth = entry[3]

            # if we have found an entry in the transposition table, then the move
            # we should try first is this best move
            move_to_try = tt_best_move
            #print(move_to_try)
            #print("FOUND ENTRY IN TT")

            if tt_depth >= depth:
                if tt_type == constant.TT_EXACT:
                    #print("FOUND PV")
                    return tt_value, tt_best_move
                elif tt_type == constant.TT_LOWER:
                    if tt_value > alpha:
                        #print("FOUND FAIL SOFT")
                        alpha = tt_value

                elif tt_type == constant.TT_UPPER:
                    if tt_value < beta:
                        #print("FOUND FAIL HARD")
                        beta = tt_value

                if alpha >= beta:
                    return tt_value, tt_best_move

        actions = self.board.update_actions(colour)
        # actions = actions_1
        actions = self.board.sort_actions(actions, colour)
        #actions = actions_1
        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the minimax search
        best_val = -inf
        best_action = None

        if move_to_try is not None and move_to_try in actions:
            #print("MOVE ORDERING")
            # put the move to try at the first position -- therefore it will be searched first
            actions = [move_to_try] + actions

        i = 0
        if len(actions) <= 12:
            favourable = actions
        elif 12 < len(actions) < 20:
            favourable = actions[:12]
        else:
            favourable = actions[:len(actions) // 2]
        # print(len(actions))

        # start negascout here
        for i, action in enumerate(favourable):
            # skip over the best action in the tt table
            if action == move_to_try and i > 0:
                continue

            elim = self.board.update_board(action, colour)

            # if we are at the first node -- this is the best node we have found so far
            # therefore we do a full search on this node
            if i == 0:
                # do a full search on the best move found so far
                score, _ = self.negascout(depth - 1, -beta, -alpha, opponent)
                score = -score

            else:
                # assume that the first move is the best move we have found so far,
                # therefore to see if this is the case we can do a null window search on the
                # rest of the moves, if the search breaks, then we know that the first move is
                # the best move and it will return the best move
                # but if the search "failed high" - i.e. the score is between alpha and beta
                # we need to do a full research of the node to work out the minimax value

                # do the null window search
                score, _ = self.negascout(depth - 1, -alpha - 1, -alpha,
                                          opponent)
                score = -score

                # if it failed high, then we just do a full search to find the actual best move
                if alpha < score < beta:
                    score, _ = self.negascout(depth - 1, -beta, -score,
                                              opponent)
                    score = -score

            # get the best value and score
            if best_val < score:
                best_val = score
                best_action = action

            # reset alpha
            if alpha < score:
                alpha = score

            # undo the action applied to the board -- we can now apply another move to the board
            self.undo_actions(action, colour, elim)

            # test for alpha beta cutoff
            if alpha >= beta:
                break

        # store the values in the transposition table
        if best_val <= original_alpha:
            # then this is an upperbound -FAILHARD
            tt_type = constant.TT_UPPER
        elif best_val >= beta:
            tt_type = constant.TT_LOWER
            # print("LOWER")
        else:
            tt_type = constant.TT_EXACT
            # print("EXACT")

        # add the entry to the transposition table
        self.tt.add_entry(self.board.board_state, colour, best_val, tt_type,
                          best_action, depth)

        return best_val, best_action
Example #7
0
 def set_player_colour(self, colour):
     self.player = colour
     self.opponent = Board.get_opp_piece_type(colour)
Example #8
0
class Player:
    def __init__(self, colour):
        # set the colour of the player
        if colour == 'white':
            self.colour = constant.WHITE_PIECE
        elif colour == 'black':
            self.colour = constant.BLACK_PIECE

        # each players internal board representation
        self.board = Board()

        # set up the minimax search strategy -- NEGAMAX
        self.minimax = Negamax(self.board, self.colour, "/eval_weights")

        # set the colour of the opponent
        self.opponent = self.board.get_opp_piece_type(self.colour)

        # set up the mini-max return values
        self.depth_eval = 0
        self.minimax_val = 0
        self.policy_vector = 0

        # initialise the action book
        self.action_book = ActionBook(self.colour)

    def update(self, action):
        # update the board based on the action of the opponent
        if self.board.phase == constant.PLACEMENT_PHASE:
            # update board also returns the pieces of the board that will be eliminated
            self.board.update_board(action, self.opponent)
            self.minimax.update_board(self.board)

        elif self.board.phase == constant.MOVING_PHASE:
            if isinstance(action[0], tuple) is False:
                print("ERROR: action is not a tuple")
                return

            # get the "to" square direction using the provided positions
            move_type = self.board.convert_coord_to_direction(
                action[0], action[1])

            # update the player board representation with the action
            self.board.update_board((action[0], move_type), self.opponent)

    def action(self, turns):

        # update the negamax/minimax board representation
        self.minimax.update_board(self.board)

        # reset the move counter of the board
        if turns == 0 and self.board.phase == constant.MOVING_PHASE:
            self.board.move_counter = 0
            self.board.phase = constant.MOVING_PHASE

        # check the action book to see if there is a state
        board_state = self.board.board_state
        if self.board.phase == constant.PLACEMENT_PHASE:
            action = self.action_book.check_state(board_state)

            # check if the action is legal
            if action is not None and self.board.check_free_square(
                    action) is True:
                # return the action found and update the board representations
                self.board.update_board(action, self.colour)
                self.minimax.update_board(self.board)
                return action

        # if there is no found state in the action book, therefore we just do a negamax search
        best_move = self.minimax.itr_negamax()

        self.depth_eval = self.minimax.eval_depth
        self.minimax_val = self.minimax.minimax_val

        # do an alpha beta search on this node
        # once we have found the best move we must apply it to the board representation
        if self.board.phase == constant.PLACEMENT_PHASE:
            self.board.update_board(best_move, self.colour)
            self.minimax.update_board(self.board)
            return best_move
        else:
            # if we are in moving phase, return the correctly formatted positions
            if best_move is None:
                self.board.update_board(best_move, self.colour)
                self.minimax.update_board(self.board)
                return None

            new_pos = Board.convert_direction_to_coord(best_move[0],
                                                       best_move[1])
            self.board.update_board(best_move, self.colour)
            self.minimax.update_board(self.board)
            return best_move[0], new_pos
    def negamax(self, depth, alpha, beta, colour):

        # print(self.board.board_state)

        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)
        original_alpha = alpha
        dic = {self.player: 1, self.opponent: -1}

        move_to_try = None
        # check if the current board state is in the transposition table
        board_str = self.board.board_state.decode("utf-8")

        key = self.tt.contains(board_str, colour, phase=self.board.phase)
        if key is not None:

            # get the value mappings from the dictionary
            board_str = key[0]
            entry = self.tt.get_entry(board_str, colour)
            tt_value = entry[0]
            tt_type = entry[1]
            tt_best_move = entry[2]
            tt_depth = entry[3]

            # if we have found an entry in the transposition table, then the move
            # we should try first is this best move
            move_to_try = tt_best_move

            if tt_depth >= depth:
                # this is the PV node therefore this is the best move that we have found so far
                if tt_type == constant.TT_EXACT:
                    return tt_value, tt_best_move

                # the minimax value in the transposition table is a lower bound to the search
                elif tt_type == constant.TT_LOWER:
                    if tt_value > alpha:
                        alpha = tt_value

                # the value in the table corresponds to a beta cutoff and therefore it is an upper bound for beta
                elif tt_type == constant.TT_UPPER:
                    if tt_value < beta:
                        beta = tt_value

                # test for cutoff -- return the best move found so far
                if alpha >= beta:
                    return tt_value, tt_best_move

        # obtain the actions and sort them
        actions = self.board.update_actions(colour)
        actions = self.board.sort_actions(actions, colour)

        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the negamax search  search
        best_val = -inf
        best_action = None

        # if we have found a best action to take in the transposition table, this should be the first
        # move we should try -- put this at the start of the list of actions
        if move_to_try is not None and move_to_try in actions:
            # put the move to try at the first position -- therefore it will be searched first
            actions = [move_to_try] + actions

        i = 0
        # split the list of actions into favourable and unfavourable actions
        # we only consider to search teh favourable actions if the action list is long enough
        if len(actions) <= 12:
            favourable = actions
        elif 12 < len(actions) < 20:
            favourable = actions[:12]
        else:
            favourable = actions[:len(actions) // 2]

        # iterate only through the favourable moves, ensuring that the number of moves is not too big
        # the aim is to reduce the branching factor as much as we can, but also having enough moves to
        # evaluate such that we get the part of the optimality decision making  from negamax/minimax
        # rather than a purely greedy approach.
        # print(len(favourable))
        for action in favourable:

            # skip over the best action in the tt table -- this action has already be searched
            if action == move_to_try and i != 0:
                continue
            i += 1

            # update the board, record the eliminated pieces from that update
            elim = self.board.update_board(action, colour)
            score, temp = self.negamax(depth - 1, -beta, -alpha, opponent)
            score = -score
            # undo the action applied to the board
            self.undo_action(action, colour, elim)

            # get the best score and action so far
            if score > best_val:
                best_val = score
                best_action = action

            # update alpha if needed
            if best_val > alpha:
                alpha = best_val

            # test for cut off
            if alpha >= beta:
                break

        # store the values in the transposition table
        if best_val <= original_alpha:
            # then this is an upper bound
            tt_type = constant.TT_UPPER
        elif best_val >= beta:
            # if the best value we have found is a lower bound
            tt_type = constant.TT_LOWER
            # print("LOWER")
        else:
            # this is the PV node value
            tt_type = constant.TT_EXACT

        # add the entry to the transposition table
        self.tt.add_entry(self.board.board_state, colour, best_val, tt_type,
                          best_action, depth)

        return best_val, best_action
class Player:

    def __init__(self, colour):
        if colour == 'white':
            self.colour = constant.WHITE_PIECE
        elif colour == 'black':
            self.colour = constant.BLACK_PIECE

        self.available_moves = []

        # each players internal board representation
        self.board = Board()

        # TODO -- need to see if this works correctly

        self.minimax = Negascout(self.board, self.colour)

        self.opponent = self.board.get_opp_piece_type(self.colour)

        self.depth_eval = 0
        self.minimax_val = 0
        self.policy_vector = 0

    def update(self, action):
        # update the board based on the action of the opponent
        if self.board.phase == constant.PLACEMENT_PHASE:
            # update board also returns the pieces of the board that will be eliminated
            self.board.update_board(action, self.opponent)
            # self.board.eliminated_pieces[self.opponent]
            self.minimax.update_board(self.board)

        elif self.board.phase == constant.MOVING_PHASE:
            if isinstance(action[0], tuple) is False:
                print("ERROR: action is not a tuple")
                return

            direction = self.board.convert_coord_to_direction(action[0], action[1])

            # update the player board representation with the action
            self.board.update_board((action[0], direction), self.opponent)
            self.minimax.update_board(self.board)

    def action(self, turns):
        self.minimax.update_board(self.board)

        if turns == 0 and self.board.phase == constant.MOVING_PHASE:
            self.board.move_counter = 0
            self.board.phase = constant.MOVING_PHASE

        # find the best move
        best_move = self.minimax.itr_negascout()
        # if the best move we have found so far is a Forfeit -- return this
        if best_move is None:
            self.board.update_board(best_move, self.colour)
            self.minimax.update_board(self.board)
            return None

        self.depth_eval = self.minimax.eval_depth
        self.minimax_val = self.minimax.minimax_val

        # once we have found the best move we must apply it to the board representation
        if self.board.phase == constant.PLACEMENT_PHASE:
            # print(best_move)
            self.board.update_board(best_move, self.colour)
            self.minimax.update_board(self.board)
            return best_move
        else:
            # (best_move is None)
            # print(best_move[0],best_move[1])
            new_pos = Board.convert_direction_to_coord(best_move[0], best_move[1])
            self.board.update_board(best_move, self.colour)
            self.minimax.update_board(self.board)
            return best_move[0], new_pos
    def negamax(self, depth, alpha, beta, colour):
        # Timeout handling
        self.time_end = self.curr_millisecond_time()
        if self.time_end - self.time_start > self.time_rem:
            raise TimeOut

        opponent = Board.get_opp_piece_type(colour)

        # for evaluation
        dic = {self.player: 1, self.opponent: -1}

        # generate legal actions
        actions = self.board.update_actions(colour)

        # terminal test -- default case
        if self.cutoff_test(depth):
            val = self.evaluate_state(self.board, self.player,
                                      actions) * dic[colour]
            return val, None

        # do the minimax search
        best_val = -inf
        best_action = None
        #print(self.board)
        #print(actions)
        #print(self.board.white_pieces)
        # print(self.board.black_pieces)
        # generate legal actions
        # actions = self.board.update_actions(colour)
        # print("THESE ACTIONS----------------")
        # print(actions)
        # print(self.board)
        # print("*"*30)
        for action in actions:
            # print("THIS CALL--------")
            # print(self.board)
            # print("THIS CALL--------")
            # if self.board.phase == constant.MOVING_PHASE:
            #     piece = self.board.get_piece(action[0])
            #     direction = action[1]
            #     if piece.is_legal_move(direction) is False:
            #         print(actions)
            #         print(self)
            #         print("WHYYYYYYYYYYYYYY--------------------------------------------")
            #         print(action[0], direction, colour)
            #         print(piece)
            #         print(piece.get_legal_actions())

            elim = self.board.update_board(action, colour)
            score, temp = self.negamax(depth - 1, -beta, -alpha, opponent)
            self.undo_action(action, colour, elim)

            score = -score

            if score > best_val:
                best_val = score
                best_action = action

            if score > alpha:
                alpha = score

            if alpha >= beta:
                break

        return best_val, best_action