def evaluate(AIs, play_num, return_draw=False): wins = 0. draw_num = 0 for i in range(play_num): state = State() AIs[0].init_prev() AIs[1].init_prev() AIs[i % 2].color = 0 AIs[1 - i % 2].color = 1 while True: s, pi = AIs[i % 2].act_and_get_pi(state) a = actionid2str(state, s) while not state.accept_action_str(a): print("this action is impossible") s, pi = AIs[i % 2].act_and_get_pi(state) a = actionid2str(state, s) AIs[1 - i % 2].prev_action = s if state.terminate: break s, pi = AIs[1 - i % 2].act_and_get_pi(state) a = actionid2str(state, s) while not state.accept_action_str(a): print("this action is impossible") s, pi = AIs[1 - i % 2].act_and_get_pi(state) a = actionid2str(state, s) AIs[i % 2].prev_action = s if state.terminate: break if i % 2 == 0 and state.reward == 1: wins += 1. elif i % 2 == 1 and state.reward == -1: wins += 1. elif state.reward == 0: wins += 0.5 draw_num += 1 sys.stderr.write('\r\033[K {}win/{}'.format(i + 1 - wins, i + 1)) sys.stderr.flush() print("") AIs[0].color = 0 AIs[1].color = 1 if return_draw: return wins, draw_num else: return wins
def normal_play(agents): state = State() while True: state.display_cui() start = time.time() s = agents[0].act(state, showNQ=True) end = time.time() print(end - start) if isinstance(s, int): a = actionid2str(state, s) else: a = s while not state.accept_action_str(a): print(a) print("this action is impossible") s = agents[0].act(state, showNQ=True) if isinstance(s, int): a = actionid2str(state, s) else: a = s agents[1].prev_action = s if state.terminate: break #time.sleep(0.1) state.display_cui() s = agents[1].act(state, showNQ=True) if isinstance(s, int): a = actionid2str(state, s) else: a = s while not state.accept_action_str(a): print(a) print("this action is impossible") s = agents[1].act(state, showNQ=True) if isinstance(s, int): a = actionid2str(state, s) else: a = s agents[0].prev_action = s #time.sleep(0.1) if state.terminate: break state.display_cui() print("The game finished. reward=({}, {})".format(state.reward, -state.reward))
class Quoridor(Widget): turn = NumericProperty(0) move_str = StringProperty("player1 move") player1wall = NumericProperty(10) player2wall = NumericProperty(10) Bx = NumericProperty(0) By = NumericProperty(0) Wx = NumericProperty(0) Wy = NumericProperty(0) button = ObjectProperty(None) human_button1 = ObjectProperty(None) ai_button1 = ObjectProperty(None) human_button2 = ObjectProperty(None) ai_button2 = ObjectProperty(None) search_nodes = 800 def dont_down(self, button): if button.state != "down": button.state = "down" def __init__(self, **kwargs): super(Quoridor, self).__init__(**kwargs) self.state = State() self.agents = [GUIHuman(0), CNNAI(1, search_nodes=self.search_nodes, tau=0.5)] self.playing_game = False self.human_button1.bind(on_press=lambda touch: self.dont_down(self.human_button1)) self.ai_button1.bind(on_press=lambda touch: self.dont_down(self.ai_button1)) self.human_button2.bind(on_press=lambda touch: self.dont_down(self.human_button2)) self.ai_button2.bind(on_press=lambda touch: self.dont_down(self.ai_button2)) self.button.bind(on_release=lambda touch: self.start_game()) self.row_wall_colors = [Color(0.7, 0.7, 0, 0) for i in range(64)] self.column_wall_colors = [Color(0.7, 0.7, 0, 0) for i in range(64)] with self.canvas.before: Color(96/255, 32/128, 0, 1) Rectangle(pos=(10, 10), size=(BOARD_LEN - 20, BOARD_LEN - 20)) Color(64/255, 0, 0, 1) for i in range(10): Rectangle(pos=(int(10 + i / 9 * (BOARD_LEN - 30)), 10), size=(10, BOARD_LEN - 20)) for i in range(10): Rectangle(pos=(10, int(10 + i / 9 * (BOARD_LEN - 30))), size=(BOARD_LEN - 20, 10)) for i, color in enumerate(self.row_wall_colors): self.canvas.add(color) x = i % 8 y = i // 8 self.canvas.add(Rectangle(pos=(int(20 + x / 9 * (BOARD_LEN - 30)), int(10 + (y + 1) / 9 * (BOARD_LEN - 30))), size=((BOARD_LEN - 30) // 9 * 2 - 10, 10))) for i, color in enumerate(self.column_wall_colors): self.canvas.add(color) x = i % 8 y = i // 8 self.canvas.add(Rectangle(pos=(int(10 + (x + 1) / 9 * (BOARD_LEN - 30)), int(20 + y / 9 * (BOARD_LEN - 30))), size=(10, (BOARD_LEN - 30) // 9 * 2 - 10))) def oneturn(self, color): global touched s = self.agents[color].act(self.state) if isinstance(self.agents[color], CNNAI): g = self.agents[color].get_tree_for_graphviz() g.render(os.path.join("game_trees", "game_tree{}".format(self.state.turn))) if s == -1: return if isinstance(s, int): a = actionid2str(self.state, s) else: a = s if not self.state.accept_action_str(a): print(a) print("this action is impossible") return self.agents[1 - color].prev_action = s self.state.display_cui() print(self.state.get_player_dist_from_goal()) touched = False def start_game(self): global touched if self.human_button1.state == "down": agent1 = GUIHuman(0) elif self.ai_button1.state == "down": #agent1 = CNNAI(0, search_nodes=self.search_nodes, tau=0.25, v_is_dist=True, p_is_almost_flat=True) agent1 = CNNAI(0, search_nodes=self.search_nodes, tau=0.25) agent1.load("./parameter/epoch110.ckpt") if self.human_button2.state == "down": agent2 = GUIHuman(1) elif self.ai_button2.state == "down": #agent2 = CNNAI(1, search_nodes=self.search_nodes, tau=0.25, v_is_dist=True, p_is_almost_flat=True) agent2 = CNNAI(1, search_nodes=self.search_nodes, tau=0.25) agent2.load("./parameter/epoch110.ckpt") self.agents = [agent1, agent2] self.state = State() self.playing_game = True touched = False def update(self, dt): if self.playing_game and not self.state.terminate: self.oneturn(self.state.turn % 2) if self.state.terminate: self.playing_game = False self.turn = self.state.turn if self.turn % 2 == 0: self.move_str = "player1 move" else: self.move_str = "player2 move" self.player1wall = self.state.black_walls self.player2wall = self.state.white_walls self.Bx = int(15 + (self.state.Bx + 0.5) / 9 * (BOARD_LEN - 30)) self.By = int(15 + (8.5 - self.state.By) / 9 * (BOARD_LEN - 30)) self.Wx = int(15 + (self.state.Wx + 0.5) / 9 * (BOARD_LEN - 30)) self.Wy = int(15 + (8.5 - self.state.Wy) / 9 * (BOARD_LEN - 30)) for x in range(8): for y in range(8): self.row_wall_colors[(7 - y) * 8 + x].a = 0 self.column_wall_colors[(7 - y) * 8 + x].a = 0 for x in range(8): for y in range(8): mouse_x, mouse_y = Window.mouse_pos if int(20 + (x + 0.5) / 9 * (BOARD_LEN - 30)) <= mouse_x < int(20 + (x + 1.5) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) <= mouse_y <= int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) + 10: self.row_wall_colors[y * 8 + x].a = 0.5 if int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) - 10 <= mouse_x <= int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 0.5) / 9 * (BOARD_LEN - 30)) <= mouse_y < int(10 + (y + 1.5) / 9 * (BOARD_LEN - 30)): self.column_wall_colors[y * 8 + x].a = 0.5 if int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) - 10 <= mouse_x <= int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) <= mouse_y <= int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) + 10: self.row_wall_colors[y * 8 + x].a = 0 self.column_wall_colors[y * 8 + x].a = 0 for x in range(8): for y in range(8): if self.state.row_wall[x, y]: self.row_wall_colors[(7 - y) * 8 + x].a = 1 if self.state.column_wall[x, y]: self.column_wall_colors[(7 - y) * 8 + x].a = 1 #print(Window.mouse_pos) def on_touch_down(self, touch): global touched, action touched = True for x in range(9): for y in range(9): if int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) - 10 <= touch.x <= int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) <= touch.y <= int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) + 10: continue if int(20 + (x + 0.5) / 9 * (BOARD_LEN - 30)) <= touch.x < int(20 + (x + 1.5) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) <= touch.y <= int(10 + (y + 1) / 9 * (BOARD_LEN - 30)) + 10: action = chr(ord("a") + x) + str(8 - y) + "h" if int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) - 10 <= touch.x <= int(20 + (x + 1) / 9 * (BOARD_LEN - 30)) and int(10 + (y + 0.5) / 9 * (BOARD_LEN - 30)) <= touch.y < int(10 + (y + 1.5) / 9 * (BOARD_LEN - 30)): action = chr(ord("a") + x) + str(8 - y) + "v" if int(20 + x / 9 * (BOARD_LEN - 30)) <= touch.x < int(10 + (x + 1) / 9 * (BOARD_LEN - 30)) and int(20 + y / 9 * (BOARD_LEN - 30)) <= touch.y < int(10 + (y + 1) / 9 * (BOARD_LEN - 30)): action = chr(ord("a") + x) + str(9 - y) #print(touch.x, touch.y) super(Quoridor, self).on_touch_down(touch)
def generate_data(AIs, play_num, noise=0.1, display=False, equal_draw=True): data = [] for i in range(play_num): state = State() AIs[0].init_prev() AIs[1].init_prev() featuress = [[], [], [], []] for i, b1, b2 in [(0, False, False), (1, True, False), (2, False, True), (3, True, True)]: featuress[i].append(state.feature_CNN(b1, b2)) pis = [] states = [state_copy(state)] while True: AIs[0].tau = np.random.rand() * (1. - TAU_MIN) + TAU_MIN AIs[1].tau = np.random.rand() * (1. - TAU_MIN) + TAU_MIN if state.turn >= 20: AIs[0].tau = TAU_MIN AIs[1].tau = TAU_MIN s, pi = AIs[0].act_and_get_pi(state, noise=noise) a = actionid2str(state, s) while not state.accept_action_str(a): print("this action is impossible") s, pi = AIs[0].act_and_get_pi(state) a = actionid2str(state, s) AIs[1].prev_action = s pis.append(pi) if display: state.display_cui() end = False for state2 in states: if equal_draw and state == state2: end = True break if end: break states.append(state_copy(state)) if state.terminate: break for i, b1, b2 in [(0, False, False), (1, True, False), (2, False, True), (3, True, True)]: featuress[i].append(state.feature_CNN(b1, b2)) s, pi = AIs[1].act_and_get_pi(state, noise=noise) a = actionid2str(state, s) while not state.accept_action_str(a): print("this action is impossible") s, pi = AIs[1].act_and_get_pi(state) a = actionid2str(state, s) AIs[0].prev_action = s pis.append(pi) if display: state.display_cui() end = False for state2 in states: if equal_draw and state == state2: end = True break if end: break states.append(state_copy(state)) if state.terminate: break for i, b1, b2 in [(0, False, False), (1, True, False), (2, False, True), (3, True, True)]: featuress[i].append(state.feature_CNN(b1, b2)) del states if state.reward == 0: continue for feature1, feature2, feature3, feature4, pi in zip(featuress[0], featuress[1], featuress[2], featuress[3], pis): data.append((feature1, pi, state.reward)) a = np.flip(pi[:64].reshape((8, 8)), 0).flatten() b = np.flip(pi[64:128].reshape((8, 8)), 0).flatten() mvarray1 = pi[128:].reshape((3, 3)) mvarray2 = np.zeros((3, 3)) for y in [-1, 0, 1]: for x in [-1, 0, 1]: mvarray2[x, y] = mvarray1[-x, y] c = mvarray2.flatten() data.append((feature2, np.concatenate([a, b, c]), state.reward)) a = np.flip(pi[:64].reshape((8, 8)), 1).flatten() b = np.flip(pi[64:128].reshape((8, 8)), 1).flatten() mvarray1 = pi[128:].reshape((3, 3)) mvarray2 = np.zeros((3, 3)) for y in [-1, 0, 1]: for x in [-1, 0, 1]: mvarray2[x, y] = mvarray1[x, -y] c = mvarray2.flatten() data.append((feature3, np.concatenate([a, b, c]), -state.reward)) a = np.flip(np.flip(pi[:64].reshape((8, 8)), 1), 0).flatten() b = np.flip(np.flip(pi[64:128].reshape((8, 8)), 1), 0).flatten() mvarray1 = pi[128:].reshape((3, 3)) mvarray2 = np.zeros((3, 3)) for y in [-1, 0, 1]: for x in [-1, 0, 1]: mvarray2[x, y] = mvarray1[-x, -y] c = mvarray2.flatten() data.append((feature4, np.concatenate([a, b, c]), -state.reward)) return data