Ejemplo n.º 1
0
class DotsAndBoxesAgent:
    """
    A DotsAndBoxesAgent object should implement the following methods:
    - __init__
    - add_player
    - register_action
    - next_action
    - end_game

    This class does not necessarily use the best data structures for the
    approach you want to use.
    """
    def __init__(self, player, nb_rows, nb_cols, timelimit):
        """Create Dots and Boxes agent.

        :param player: Player number, 1 or 2
        :param nb_rows: Rows in grid
        :param nb_cols: Columns in grid
        :param timelimit: Maximum time allowed to send a next action.
        """
        self.player = {player}
        self.timelimit = timelimit
        self.ended = False
        self.board = Strings_board(nb_rows, nb_cols)
        self.tree = MonteCarloSearchTree(nb_rows, nb_cols)
        self.nodes = self.tree.tree['nodes']
        self.odds = []
        i = 0
        self.moves = []
        self.mctsmoves = 0
        self.heuristicmoves = 0
        self.mcts = True
        self.times_for_move = []
        while i < 120:
            if (i % 2 != 0):
                self.odds.append(i)
            i += 1

    def add_player(self, player):
        """Use the same agent for multiple players."""
        self.player.add(player)

    def register_action(self, row, column, orientation, player):
        """
        Register action played in game.
        :param row:
        :param columns:
        :param orientation: "v" or "h"
        :param player: 1 or 2
        """
        if (orientation == "h"):
            y = column
            x = row * 2
        else:
            y = column
            x = self.odds[row]
        self.board.fill_line(x, y)
        self.moves.append(
            str(row) + "," + str(column) + "," + str(orientation))
        node = self.tree.fill_line(
            self.nodes,
            str(row) + "," + str(column) + "," + str(orientation))
        if node != False:
            self.nodes = node['children']

    def next_action(self):
        """Return the next action this agent wants to perform.

        :return: (row, column, orientation)
        """
        start_time = time.time()
        free_lines = self.board.get_potential_moves()
        if len(free_lines) == 0:
            # Board full
            return None
        signal.alarm(self.timelimit)
        try:
            (s, value) = self.tree.get_best_move_for_set(self.moves.copy())
            if not isinstance(s, str) or self.mcts == False or s in self.moves:
                (a, b) = heuristics.find_good_move(self.board)
                signal.alarm(0)
                self.mcts = False
                if a % 2 == 0:
                    o = "h"
                    c = b
                    r = int(a / 2)
                else:

                    o = "v"
                    c = b
                    r = self.odds.index(a)
                self.heuristicmoves += 1
                elapsed_time = time.time() - start_time
                self.times_for_move.append(elapsed_time)
                return r, c, o
            else:
                signal.alarm(0)
                self.mctsmoves += 1
                r, c, o = s.split(",")
                elapsed_time = time.time() - start_time
                self.times_for_move.append(elapsed_time)
                return r, c, o
        except TimeoutException:
            (a, b) = heuristics.find_good_move(self.board)
            signal.alarm(0)
            self.mcts = False
            if a % 2 == 0:
                o = "h"
                c = b
                r = int(a / 2)
            else:

                o = "v"
                c = b
                r = self.odds.index(a)
            self.heuristicmoves += 1
            elapsed_time = time.time() - start_time
            self.times_for_move.append(elapsed_time)
            return r, c, o

    def end_game(self):
        time = 0
        for t in self.times_for_move:
            time += t
        print("avg time v3 =", int(time) / len(self.times_for_move))
        self.ended = True
class DotsAndBoxesAgent:
    """
    A DotsAndBoxesAgent object should implement the following methods:
    - __init__
    - add_player
    - register_action
    - next_action
    - end_game

    This class does not necessarily use the best data structures for the
    approach you want to use.
    """
    def __init__(self, player, nb_rows, nb_cols, timelimit):
        """Create Dots and Boxes agent.

        :param player: Player number, 1 or 2
        :param nb_rows: Rows in grid
        :param nb_cols: Columns in grid
        :param timelimit: Maximum time allowed to send a next action.
        """
        self.player = {player}
        self.timelimit = timelimit
        self.ended = False
        self.board = Strings_board(nb_rows, nb_cols)
        self.times_for_move = []

        self.odds = []
        i = 0
        while i < 120:
            if (i % 2 != 0):
                self.odds.append(i)
            i += 1

    def add_player(self, player):
        """Use the same agent for multiple players."""
        self.player.add(player)

    def register_action(self, row, column, orientation, player):
        """
        Register action played in game.
        :param row:
        :param columns:
        :param orientation: "v" or "h"
        :param player: 1 or 2
        """
        if (orientation == "h"):
            y = column
            x = row * 2
        else:
            y = column
            x = self.odds[row]
        self.board.fill_line(x, y)

    def next_action(self):
        """Return the next action this agent wants to perform.

        :return: (row, column, orientation)
        """
        start_time = time.time()
        free_lines = self.board.get_potential_moves()
        if len(free_lines) == 0:
            # Board full
            return None
        (a, b) = heuristics.find_good_move(self.board)
        if a % 2 == 0:
            o = "h"
            c = b
            r = int(a / 2)
        else:
            o = "v"
            c = b
            r = self.odds.index(a)
        elapsed_time = time.time() - start_time
        self.times_for_move.append(elapsed_time)
        return r, c, o

    def end_game(self):
        time = 0
        for t in self.times_for_move:
            time += t
        print("avg time v3 =", int(time) / len(self.times_for_move))
        self.ended = True
Ejemplo n.º 3
0
class DotsAndBoxesAgent:
    """
    A DotsAndBoxesAgent object should implement the following methods:
    - __init__
    - add_player
    - register_action
    - next_action
    - end_game

    This class does not necessarily use the best data structures for the
    approach you want to use.
    """
    def __init__(self, player, nb_rows, nb_cols, timelimit):
        """Create Dots and Boxes agent.

        :param player: Player number, 1 or 2
        :param nb_rows: Rows in grid
        :param nb_cols: Columns in grid
        :param timelimit: Maximum time allowed to send a next action.
        """
        self.player = {player}
        self.timelimit = timelimit
        self.ended = False
        self.board = Strings_board(nb_rows, nb_cols)
        self.depth = 5
        self.board2 = Coins_strings_board(nb_rows + 1, nb_cols + 1)
        self.odds = []
        self.evens = []
        self.maxnumberofmoves = len(self.board.get_potential_moves())
        self.numberofmovesdone = 0

        i = 0
        while i < 120:
            if (i % 2 == 0):
                self.evens.append(i)
            else:
                self.odds.append(i)
            i += 1

    def add_player(self, player):
        """Use the same agent for multiple players."""
        self.player.add(player)

    def register_action(self, row, column, orientation, player):
        """
        Register action played in game.
        :param row:
        :param columns:
        :param orientation: "v" or "h"
        :param player: 1 or 2
        """
        if (orientation == "h"):
            y = column
            x = row * 2
        else:
            y = column
            x = self.odds[row]
        self.board.fill_line(x, y)
        if (orientation == "h"):
            a = self.evens[row]
            b = self.odds[column]
        else:
            a = self.odds[row]
            b = self.evens[column]
        self.board2.fill_line(a, b, player)
        self.numberofmovesdone += 1

    def next_action(self):
        """Return the next action this agent wants to perform.

        :return: (row, column, orientation)
        """
        free_lines = self.board.get_potential_moves()
        if len(free_lines) == 0:
            # Board full
            return None
            # Start the timer. Once 5 seconds are over, a SIGALRM signal is sent.

        # This try/except loop ensures that
        #   you'll catch TimeoutException when it's sent.

        if (self.numberofmovesdone / self.maxnumberofmoves * 100 < 60):
            (a, b) = heuristics.heuristic(self.board)
            signal.alarm(0)
            if a % 2 == 0:
                o = "h"
                c = b
                r = int(a / 2)
            else:
                o = "v"
                c = b
                r = self.odds.index(a)
            return r, c, o
        signal.alarm(self.timelimit)
        try:
            (a, b, score) = abv1.alphabeta(self.board2,
                                           10,
                                           player=list(self.player)[0])
            signal.alarm(0)
            self.depth += 1
            if a % 2 == 0:
                x = self.odds.index(b)
                y = self.evens.index(a)
                return (y, x, "h")
            else:
                y = self.odds.index(a)
                x = self.evens.index(b)
                return (y, x, "v")
        except TimeoutException:
            self.depth -= 1
            (a, b) = heuristics.heuristic(self.board)
            signal.alarm(0)
            if a % 2 == 0:
                o = "h"
                c = b
                r = int(a / 2)
            else:
                o = "v"
                c = b
                r = self.odds.index(a)
            return r, c, o

    def end_game(self):
        self.ended = True
Ejemplo n.º 4
0
class DotsAndBoxesAgent:
    """
    A DotsAndBoxesAgent object should implement the following methods:
    - __init__
    - add_player
    - register_action
    - next_action
    - end_game

    This class does not necessarily use the best data structures for the
    approach you want to use.
    """
    def __init__(self, player, nb_rows, nb_cols, timelimit):
        """Create Dots and Boxes agent.

        :param player: Player number, 1 or 2
        :param nb_rows: Rows in grid
        :param nb_cols: Columns in grid
        :param timelimit: Maximum time allowed to send a next action.
        """
        self.player = {player}
        self.timelimit = timelimit
        self.ended = False
        self.board = Strings_board(nb_rows,nb_cols)
        self.tree = MonteCarloSearchTree(nb_rows,nb_cols)
        self.nodes = self.tree.tree['nodes']
        self.odds = []
        i = 0
        self.moves = []
        self.mctsmoves = 0
        self.heuristicmoves = 0
        self.mcts = True
        self.shouldnotstartwith = []
        while i<120:
            if(i%2!=0):
                self.odds.append(i)
            i += 1

    def add_player(self, player):
        """Use the same agent for multiple players."""
        self.player.add(player)

    def register_action(self, row, column, orientation, player):
        """
        Register action played in game.
        :param row:
        :param columns:
        :param orientation: "v" or "h"
        :param player: 1 or 2
        """
        if(orientation == "h"):
            y = column
            x = row*2
        else:
            y = column
            x = self.odds[row]
        self.board.fill_line(x,y)
        self.moves.append(str(row)+","+str(column)+","+str(orientation))
        print(self.moves)
        # node = self.tree.fill_line(self.nodes,str(row)+","+str(column)+","+str(orientation))
        # if node != False:
        #     self.nodes = node['children']

    def next_action(self):
        """Return the next action this agent wants to perform.

        :return: (row, column, orientation)
        """
        free_lines = self.board.get_potential_moves()
        if len(free_lines) == 0:
            # Board full
            return None
        move = False
        if self.mcts:
            value = 0
            for l in it.permutations(self.moves, len(self.moves)):
                go = True
                for seq in self.shouldnotstartwith:
                    li = list(l)
                    if li[:len(seq)] == seq:
                        go = False
                        break
                if go:
                    (newmove,rate) =  self.tree.get_best_move_for_set(list(l).copy())
                    if rate > value and newmove not in self.moves:
                        value = rate
                        move = newmove
                        self.mctsmoves += 1
                        r,c,o = move.split(",")
                        return r,c,o
                    else:
                        self.shouldnotstartwith.append(list(l))
                print("SHOULDNTO",self.shouldnotstartwith)
                print("MCTS POWER")
        if not isinstance(move, str) or self.mcts == False:
            (a,b) = heuristics.find_good_move(self.board)
            self.mcts = False
            if a%2==0:
                o = "h"
                c = b
                r = int(a/2)
            else:

                o = "v"
                c = b
                r = self.odds.index(a)
            self.heuristicmoves += 1
            return r, c, o



    def end_game(self):
        print("HEURISTIC MOVES:",self.heuristicmoves)
        print("MCTS MOVES:",self.mctsmoves)
        self.ended = True
Ejemplo n.º 5
0
def train(nb_rows, nb_cols):
    board = Strings_board(nb_rows, nb_cols)
    edges = []
    for i, row in enumerate(board.board):
        for j, val in enumerate(row):
            board.board[i][j] = True
            edges.append((i, j))
    for m in range(nb_rows * nb_cols):

        board_states = combinations(edges, m)

        if m == 0:  #end states
            board_num = str(qlearning.board2num(board.board))
            max_score = (nb_rows * nb_cols)

            Qedge = [dict()]
            Q = [[dict() for i in range(max_score + 1)]]
            for score in range(max_score + 1):
                Q[m][score][board_num] = RW * (score - (max_score // 2))

        else:
            # initiate row in Q-value lookup table
            Q.append(
                [dict() for i in range(max_score - int(np.floor(m // 4)))])
            Qedge.append(dict())
            for board_state in board_states:
                # construct board from board state

                for edge in board_state:
                    i, j = edge
                    board.board[i][j] = False

                # find out how many squares are already filled
                total_score = max_score - len(board.get_potential_moves())

                # find the best move - it must be the same, regardless of score
                # we will consider it to be total_score
                potential_moves = []

                for i, row in enumerate(board.board):
                    for j, val in enumerate(row):
                        if val == False:
                            gain = board.check_surrounding_squares((i, j), 3)
                            # move remembers edge inserted and score gain,
                            potential_moves.append(((i, j), gain))

                # map potential_moves on Q-values
                qmax = -np.inf
                board_num = qlearning.board2num(board.board)
                for move in potential_moves:
                    edge, gain = move
                    edge_num = qlearning.edge2num(edge)
                    if gain:
                        qval = gamma * Q[m - 1][total_score +
                                                gain][str(board_num +
                                                          edge_num)]
                    else:
                        qval = -gamma * Q[m - 1][0][str(board_num + edge_num)]

                    if qmax < qval:
                        qmax = qval
                        best_edge = edge

                # insert Q-values
                for score_state in range(total_score + 1):
                    Q[m][score_state][str(
                        board_num)] = qmax - RW * (total_score - score_state)
                # insert best move
                Qedge[m][str(board_num)] = best_edge

                for edge in board_state:
                    i, j = edge
                    board.board[i][j] = True
    return (Q, Qedge)