コード例 #1
0
                              -100,
                              100,
                              board_nextMoves,
                              force_best_move=False)
            done = False
            board.reset()
            sem_game.MAX_MOVES = i
            print("here")
            t0 = time.clock()
            while not done:
                if board.turn == 1:
                    minimax_move = minimax.run_search(board, 1)
                    move = (minimax_move[0], minimax_move[1])
                    board.make_move(move)

                    if board.check_win() != -1:
                        done = True
                        print("Win 1")
                        board.showBoard()
                else:
                    minimax_move = minimax.run_search(board, -1)
                    move = (minimax_move[0], minimax_move[1])
                    board.make_move(move)

                    if board.check_win() != -1:
                        done = True
                        print("Win 2")
                        board.showBoard()

                board.turn *= -1
コード例 #2
0
class SemEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, _type='DQN', _minimax_rate=0):
        self._type = _type
        self._minimax_rate = _minimax_rate

        self.action_space = spaces.Discrete(BOARD_ROWS * BOARD_COLS)
        self.observation_space = spaces.Box(low=0,
                                            high=1,
                                            shape=(BOARD_ROWS, BOARD_COLS,
                                                   MAX_MOVES),
                                            dtype=np.uint8)
        #self.observation_space = spaces.Box(low=0, high=1, shape=(64, 64, MAX_MOVES), dtype=np.uint8)
        try:
            self.minimax_player = Player(
                _name="board_nextMoves_" + str(MAX_MOVES) + "_" +
                str(BOARD_ROWS) + "x" + str(BOARD_COLS) + "_MMPS",
                _player_type="Minimax")
        except:
            print("Minimax agent not loaded")
        self.rand = Player()
        self.agent_turn = 1

        self.board = Board()
        self.done = False
        self.padding = False
        self.minimax_test = False

        self.states_lst = []
        self.counter = 0

        self.reset()

    def step(self, action=-1):
        reward = 0
        #----------------- Monte Carlo's Step ------------------------------
        if self._type == 'Monte Carlo':
            if action != -1:
                if type(action) == int:
                    movePos = (int(action / BOARD_COLS),
                               int(action % BOARD_COLS))
                else:
                    movePos = action
                moveDone = self.board.make_move(movePos)

                win = self.board.check_win()
                if win >= 0:
                    reward = -1
                    self.done = True

                    return self.board.getHash(), reward, self.done, {}

                # if self.minimax_test:
                #     self.board.showBoard()

            else:
                if self.minimax_test:
                    botMove = self.minimax_player.choose_action(
                        self.board, player=-self.agent_turn)
                else:
                    botMove = self.rand.choose_action(self.board,
                                                      player=self.board.turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))

                win = self.board.check_win()
                if win >= 0:
                    reward = self.board.turn
                    self.done = True
                    return self.board.getHash(), reward, self.done, {}

                # if self.minimax_test:
                #     self.board.showBoard()

            self.board.turn *= -1
            return self.board.getHash(), reward, self.done, {}

        #----------------- Monte Carlo's Step Test ------------------------------
        if self._type == 'Monte Carlo Test':
            #---------- Agent Move -----------------------------
            if action != -1:  # Action specified at step request
                if type(action) == int:
                    movePos = (int(action / BOARD_COLS),
                               int(action % BOARD_COLS))
                else:
                    movePos = action
                moveDone = self.board.make_move(movePos)
            else:  # Action not specified, random action taken
                botMove = self.rand.choose_action(self.board,
                                                  player=self.board.turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))

            self.states_lst.append(self.board.state)
            win = self.board.check_win()
            if win != -1:
                reward = 1
                self.done = True

                return self.board.getHash(), reward, self.done, {}

            #--------- Random Bot Move -------------------------
            if np.random.rand() < self._minimax_rate:
                positions = self.board.availablePositions()
                botMove = self.minimax_player.choose_action(
                    self.board, player=-self.agent_turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))
            else:
                positions = self.board.availablePositions()
                botMove = self.rand.choose_action(self.board,
                                                  player=-self.agent_turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))

            self.states_lst.append(self.board.state)
            win = self.board.check_win()
            if win != -1:
                reward = -1
                self.done = True

                # final_state = self.board.state
                # for s in self.states_lst:
                #     self.board.state = s
                #     self.board.showBoard()
                # self.board.state = final_state

                return self.board.getHash(), reward, self.done, {}

            return self.board.getHash(), reward, self.done, {}

        #-------------------- Q-learning Step ----------------------------------
        if self._type == "Q-learning":
            #---------- Agent Move -----------------------------
            #move_pos = (int(action / BOARD_COLS), int(action % BOARD_COLS))
            moveDone = self.board.make_move(action)

            if moveDone == 0:
                reward = -2
                self.done = True

                return self.board.getHash(), reward, self.done, {}

            win = self.board.check_win()
            if win != -1:
                reward = 1
                self.done = True

                return self.board.getHash(), reward, self.done, {}

            #--------- Random Bot Move -------------------------
            if np.random.rand() < 0.5:
                positions = self.board.availablePositions()
                botMove = self.minimax_player.choose_action(
                    self.board, player=-self.agent_turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))
            else:
                positions = self.board.availablePositions()
                botMove = self.rand.choose_action(self.board,
                                                  player=-self.agent_turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))

            win = self.board.check_win()
            if win != -1:
                reward = -1
                self.done = True

                return self.board.getHash(), reward, self.done, {}

            return self.board.getHash(), reward, self.done, {}

        #-------------------- DQN SB's Step -----------------------------------
        if self._type == "DQN":
            #---------- Agent Move -----------------------------
            move_pos = (int(action / BOARD_COLS), int(action % BOARD_COLS))
            moveDone = self.board.make_move(move_pos)

            if moveDone == 0:
                reward = -2
                self.done = True

                return self.board.get_one_hot(
                    self.padding), reward, self.done, {}

            win = self.board.check_win()
            if win != -1:
                reward = 1
                self.done = True

                return self.board.get_one_hot(
                    self.padding), reward, self.done, {}

            #--------- Random Bot Move -------------------------
            if np.random.rand() < self._minimax_rate:
                #print("here")
                #self.board.showBoard()
                positions = self.board.availablePositions()
                botMove = self.minimax_player.choose_action(
                    self.board, player=-self.agent_turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))
                #self.board.showBoard()
            else:
                positions = self.board.availablePositions()
                botMove = self.rand.choose_action(self.board,
                                                  player=-self.agent_turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))

            win = self.board.check_win()
            if win != -1:
                reward = -1
                self.done = True

                return self.board.get_one_hot(
                    self.padding), reward, self.done, {}

            # return self.board.state, reward, self.done, {}
            # return self.one_hot_encode(self.board.state), reward, self.done, {}
            return self.board.get_one_hot(self.padding), reward, self.done, {}
            #return canonic_state, reward, self.done, {}

        #-------------------- DQN SB's Step Test -----------------------------------
        if self._type == "DQN_test":
            #---------- Agent Move -----------------------------
            move_pos = (int(action / BOARD_COLS), int(action % BOARD_COLS))
            moveDone = self.board.make_move(move_pos)

            if moveDone == 0:
                reward = -2
                self.done = True

                return self.board.get_one_hot(
                    self.padding), reward, self.done, {}

            win = self.board.check_win()
            if win != -1:
                reward = 1
                self.done = True

                return self.board.get_one_hot(
                    self.padding), reward, self.done, {}

            #--------- Random Bot Move -------------------------
            if np.random.rand() < self._minimax_rate:
                positions = self.board.availablePositions()
                botMove = self.minimax_player.choose_action(
                    self.board, player=-self.agent_turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))
            else:
                positions = self.board.availablePositions()
                botMove = self.rand.choose_action(self.board,
                                                  player=-self.agent_turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))

            win = self.board.check_win()
            if win != -1:
                reward = -1
                self.done = True

                return self.board.get_one_hot(
                    self.padding), reward, self.done, {}

            # return self.board.state, reward, self.done, {}
            # return self.one_hot_encode(self.board.state), reward, self.done, {}
            return self.board.get_one_hot(self.padding), reward, self.done, {}
            #return canonic_state, reward, self.done, {}

    def reset(self):
        self.board.reset()
        self.done = False
        self.states_lst = []

        if self.agent_turn == -1:
            if random.rand() < 0 and self._type == "DQN" and self.counter < 0:
                self.get_init_board()
                self.counter += 1
            else:
                botMove = self.rand.choose_action(self.board,
                                                  player=-self.agent_turn)
                moveDone = self.board.make_move((botMove[0], botMove[1]))

            self.board.turn = -1

        # if self.minimax_test:
        #     self.board.showBoard()

        # if np.random.choice(range(2)) == 1:
        # if self.agent_turn == -1:
        #     positions = self.board.availablePositions()
        #     botMove = self.rand_bot.choose_action(self.board, player = -self.agent_turn)
        #     moveDone = self.board.make_move((botMove[0], botMove[1]))

        # hash_state = self.board.getHash()
        # canonic_state = self.get_canonic_state(hash_state)
        # canonic_state = self.board.get_array_from_flat(canonic_state[0])
        return self.board.get_one_hot(self.padding)

    def render(self):
        self.board.showBoard()

    def possible_move_boards(self):
        positions = self.board.availablePositions()
        possible_Boards = []

        for p in positions:
            self.board.make_move(p)
            possible_Boards.append((self.board.getHash(), p))
            self.board.undo_move(p)

        return possible_Boards

    def set_state(self, state):
        return self.board.set_state(state)

    def get_symmetry(self, board_hash=-1):
        return self.board.get_symmetry(board_hash)

    def get_canonic_score(self, board_flat=-1):
        return self.board.get_canonic_score(board_flat)

        # Returns always the same state for all any symmetric states and a list of all those symmetric states
    def get_canonic_state(self, board_hash=-1):
        all_symmetry = self.get_symmetry(board_hash)
        all_canonic_score = [self.get_canonic_score(b) for b in all_symmetry]
        canonic_board_index = all_canonic_score.index(max(all_canonic_score))
        return all_symmetry[canonic_board_index], all_symmetry

    def get_canonic_state_mask(self, board_flat=-1):
        return self.board.get_canonic_state_mask(board_flat)

    def one_hot_encode(self, state):
        one_hot_board = np.zeros((MAX_MOVES, BOARD_ROWS, BOARD_COLS))
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                for m in range(MAX_MOVES + 1):
                    if state[i, j] == m + 1:
                        one_hot_board[m, i, j] = 1
                        break
        # print("------------------")
        # print(state)
        # print(one_hot_board)
        #padded_state = np.pad(one_hot_board, ((0, 0), (31, 30), (31, 31)),mode='constant', constant_values=(0))
        return one_hot_board

    def pad(self, state):
        padded_state = np.pad(state, ((0, 0), (31, 30), (30, 30)),
                              mode='constant',
                              constant_values=(0))
        print(padded_state)
        return padded_state.reshape(-1, BOARD_ROWS, BOARD_COLS, MAX_MOVES)

    def reshape_cnn(self, state):
        return state.reshape(-1, BOARD_ROWS, BOARD_COLS, MAX_MOVES)

    def get_init_board(self):
        done = False
        turn = 1
        moves_lst = []
        while not done:
            if np.random.rand() < 0.8:
                botMove = self.minimax_player.choose_action(self.board,
                                                            player=turn)
            else:
                botMove = self.rand.choose_action(self.board, player=turn)
            moveDone = self.board.make_move((botMove[0], botMove[1]))

            moves_lst.append(botMove)

            win = self.board.check_win()
            if win != -1:
                if turn == -1:
                    possible_steps_back = []
                    for i in range(len(moves_lst)):
                        if i % 2 != 0:
                            possible_steps_back.append(i)
                    steps_back = random.choice([1])
                    reversed_moves_lst = []
                    for move in reversed(moves_lst):
                        reversed_moves_lst.append(move)
                    #print(len(moves_lst))
                    #print(moves_lst)
                    if turn == 1:
                        steps_back += 1
                    for i in range(steps_back):
                        self.board.undo_move(reversed_moves_lst[i])
                    done = True
                else:
                    self.board.reset()
                    moves_lst = []
                    turn = 1

            turn *= -1
        #self.board.showBoard()
        return self.board.get_one_hot(self.padding)
コード例 #3
0
class VisualGame(tk.Frame):
    def __init__(self, parent):
        self.frame = tk.Frame.__init__(self, parent)
        self.root = parent

        #self.p2 = Player(_name="/home/alexandre/sem-project-logs/100k_mm_sem3_3x4_dqn_32", _player_type="DQN")    # DQN
        #self.p2 = Player(_name="board_nextMoves_3_4_3_b", _player_type="Minimax")
        #print(self.p2.agent.states_value)
        #print(sorted(self.p2.agent.states_value))
        #print(len(self.p2.agent.states_value))
        #self.p2.agent.set_test_mode(True)

        self.canvas = tk.Canvas(self, bg="grey")
        self.canvas.create_line(100, 5, 100, 300, width=3)
        self.canvas.create_line(200, 5, 200, 300, width=3)
        self.canvas.create_line(300, 5, 300, 300, width=3)

        self.canvas.create_line(5, 100, 400, 100, width=3)
        self.canvas.create_line(5, 200, 400, 200, width=3)
        self.canvas.create_rectangle(5, 5, 400, 300, width=5)

        self.canvas.pack(fill="both", expand=1)
        self.canvas.bind("<Button-1>", self.makeMove)

        self.reset_b = tk.Button(text="Reset", command=self.reset)
        self.reset_b.pack(side="bottom")

        self.optionsFrame = tk.Frame()
        self.optionsFrame.pack(side="bottom")

        self.alg_choice = tk.StringVar(root)
        self.alg_choices = {'Minimax', 'Monte Carlo', 'Q-learning', 'DQN'}
        self.alg_choice.set('Q-learning')  # set the default option

        popupMenu = tk.OptionMenu(self.optionsFrame, self.alg_choice,
                                  *self.alg_choices)
        # tk.Label(root, text="Choose a dish")
        popupMenu.pack(side="right")

        self.player_choice = tk.StringVar(root)
        self.player_choices = {'Player 1', 'Player 2'}
        self.player_choice.set('Player 1')  # set the default option

        popupMenu1 = tk.OptionMenu(self.optionsFrame, self.player_choice,
                                   *self.player_choices)
        # tk.Label(root, text="Choose a dish")
        popupMenu1.pack(side="left", padx=15, pady=0)

        #self.txt_frame = tk.Frame(root, )
        self.winner_txt = tk.Label(self.canvas, height=1, width=50, bg="grey")
        self.winner_txt.config(font=("Arial", 15))
        self.winner_txt.pack(side="bottom", padx=0, pady=0)
        #self.winner_txt.place(x = 200, y = 400)

        self.board = Board()
        self.bot_turn = -1
        self.bot_type = "Q-learning"

        # if self.bot_type == "DQN":
        #     self.p2 = Player(_name="200k_mm_sem1_3x4_32", _player_type="DQN")
        # elif self.bot_type == "Minimax":
        #     self.p2 = Player(_name="board_nextMoves_3_4_3_mmps", _player_type="Minimax")
        # elif self.bot_type == "Q-Learning":
        #     self.p2 = Player(_name="policy_sem1_3_2_20k", _player_type="Q-Learning")

        self.update_bot_type()
        self.reset()

    def makeMove(self, event):
        print(self.bot_type)
        if self.check_win() == -1 and self.bot_type != None:
            y = int(event.x / 100)
            x = int(event.y / 100)
            print(x, y)
            moveMade = self.board.make_move((x, y))
            print(moveMade)
            if moveMade == 1:
                self.update_visual()
                print(self.check_win(return_line=True))
                if self.board.check_win() == -1:
                    self.root.after(np.random.randint(500, 1000),
                                    self.bot_move)

    def bot_move(self):
        print("-----------")
        print(self.p2._name)
        print("-----------")
        positions = self.board.availablePositions()
        action = self.p2.choose_action(self.board, player=self.bot_turn)
        print("here " + str(self.bot_turn))
        #print("here" + str(action))
        moveMade = self.board.make_move(action)
        print(moveMade)
        if moveMade == 1:
            self.update_visual()
            print(self.check_win(return_line=True))

    def update_visual(self):
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                if self.board.state[i, j] == 1:
                    self.canvas.create_oval(10 + 100 * j,
                                            10 + 100 * i,
                                            90 + 100 * j,
                                            90 + 100 * i,
                                            fill="green",
                                            width=3,
                                            tags="move")
                elif self.board.state[i, j] == 2:
                    self.canvas.create_oval(10 + 100 * j,
                                            10 + 100 * i,
                                            90 + 100 * j,
                                            90 + 100 * i,
                                            fill="yellow",
                                            width=3,
                                            tags="move")
                elif self.board.state[i, j] == 3:
                    self.canvas.create_oval(10 + 100 * j,
                                            10 + 100 * i,
                                            90 + 100 * j,
                                            90 + 100 * i,
                                            fill="red",
                                            width=3,
                                            tags="move")

    def update_bot_type(self):
        try:
            if self.bot_type == "DQN":
                if self.bot_turn == -1:
                    self.p2 = Player(_name="policy2_sem" + str(MAX_MOVES) +
                                     "_" + str(BOARD_ROWS) + "x" +
                                     str(BOARD_COLS),
                                     _player_type="DQN")
                else:
                    self.p2 = Player(_name="policy1_sem" + str(MAX_MOVES) +
                                     "_" + str(BOARD_ROWS) + "x" +
                                     str(BOARD_COLS),
                                     _player_type="DQN")
            elif self.bot_type == "Minimax":
                self.p2 = Player(_name="board_nextMoves_" + str(MAX_MOVES) +
                                 "_" + str(BOARD_ROWS) + "x" +
                                 str(BOARD_COLS) + "_MMPSC",
                                 _player_type="Minimax")
            elif self.bot_type == "Q-learning":
                if self.bot_turn == -1:
                    self.p2 = Player(_name="policy2_sem" + str(MAX_MOVES) +
                                     "_" + str(BOARD_ROWS) + "x" +
                                     str(BOARD_COLS),
                                     _player_type="Q-learning")
                else:
                    self.p2 = Player(_name="policy1_sem" + str(MAX_MOVES) +
                                     "_" + str(BOARD_ROWS) + "x" +
                                     str(BOARD_COLS),
                                     _player_type="Q-learning")
            elif self.bot_type == "Monte Carlo":
                self.p2 = Player(_name="policy_sem" + str(MAX_MOVES) + "_" +
                                 str(BOARD_ROWS) + "x" + str(BOARD_COLS) +
                                 "_SCM",
                                 _player_type="Monte Carlo")
        except:
            print("Error trying to load bot's data")

    def check_win(self, return_line=False):
        if return_line:
            win_flag, win_line = self.board.check_win(return_line=True)
            if win_flag != -1:
                if self.bot_turn == 1:
                    if self.board.movesMade % 2 == 0:
                        txt = "You Won!"
                    else:
                        txt = "You Lost..."
                else:
                    if self.board.movesMade % 2 != 0:
                        txt = "You Won!"
                    else:
                        txt = "You Lost..."
                self.winner_txt["text"] = txt

                line_dir = (win_line[2][0] - win_line[0][0],
                            win_line[2][1] - win_line[0][1])
                if line_dir[0] == 0:
                    self.canvas.create_line(30 + 100 * win_line[0][1],
                                            50 + 100 * win_line[0][0],
                                            70 + 100 * win_line[2][1],
                                            50 + 100 * win_line[2][0],
                                            width=12,
                                            tags="win_line")
                elif line_dir[1] == 0:
                    self.canvas.create_line(50 + 100 * win_line[0][1],
                                            30 + 100 * win_line[0][0],
                                            50 + 100 * win_line[2][1],
                                            70 + 100 * win_line[2][0],
                                            width=12,
                                            tags="win_line")
                elif line_dir[0] > 0 and line_dir[1] > 0:
                    self.canvas.create_line(40 + 100 * win_line[0][1],
                                            40 + 100 * win_line[0][0],
                                            60 + 100 * win_line[2][1],
                                            60 + 100 * win_line[2][0],
                                            width=12,
                                            tags="win_line")
                else:
                    self.canvas.create_line(60 + 100 * win_line[0][1],
                                            40 + 100 * win_line[0][0],
                                            40 + 100 * win_line[2][1],
                                            60 + 100 * win_line[2][0],
                                            width=12,
                                            tags="win_line")
        else:
            win_flag = self.board.check_win(return_line=False)
            if win_flag != -1:
                if self.board.movesMade % 2 == 0:
                    txt = "You Won!"
                else:
                    txt = "You Lost..."
                self.winner_txt["text"] = txt

        return win_flag

    def reset(self):
        self.board.reset()
        self.canvas.delete("move")
        self.canvas.delete("win_line")
        self.winner_txt["text"] = ""

        if self.bot_type != self.alg_choice.get():
            self.bot_type = self.alg_choice.get()
            self.update_bot_type()
        if self.bot_turn == 1 and self.player_choice.get() == "Player 1":
            self.bot_turn = -1
            self.update_bot_type()
        if self.bot_turn == -1 and self.player_choice.get() == "Player 2":
            self.bot_turn = 1
            self.update_bot_type()

        # if self.player_choice.get() == "Player 1":
        #     self.bot_turn = -1
        # else:
        #     self.bot_turn = 1

        if self.bot_turn == 1:
            self.bot_move()

    def change_dropdown(self, *args):
        self.bot_type = self.tkvar.get()
        self.update_bot_type()
        print(self.tkvar.get())