def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) gomoku_model = PlayWithHuman(config) while True: env = GomokuEnv().reset() human_is_black = random() < 0.5 gomoku_model.start_game(human_is_black) while not env.done: if env.player_turn() == Player.black: if not human_is_black: action = gomoku_model.move_by_ai(env) print("IA moves to: " + str(action)) else: action = gomoku_model.move_by_human(env) print("You move to: " + str(action)) else: if human_is_black: action = gomoku_model.move_by_ai(env) print("IA moves to: " + str(action)) else: action = gomoku_model.move_by_human(env) print("You move to: " + str(action)) env.step(action) env.render() print("\nEnd of the game.") print("Game result:") if env.winner == Winner.white: print("X wins") elif env.winner == Winner.black: print("O wins") else: print("Game was a draw")
async def search_my_move(self, env: GomokuEnv, is_root_node=False): """ Q, V is value for this Player(always white). P is value for the player of next_player (black or white) :param env: :param is_root_node: :return: """ if env.done: if env.winner == Winner.white: return 1 elif env.winner == Winner.black: return -1 else: return 0 key = self.counter_key(env) while key in self.now_expanding: await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec) # is leaf? if key not in self.expanded: # reach leaf node leaf_v = await self.expand_and_evaluate(env) if env.player_turn() == Player.white: return leaf_v # Value for white else: return -leaf_v # Value for white == -Value for white action_t = self.select_action_q_and_u(env, is_root_node) _, _ = env.step(action_t) # back propagate the values upward the search tree virtual_loss = self.config.play.virtual_loss self.var_n[key][action_t] += virtual_loss self.var_w[key][action_t] -= virtual_loss leaf_v = await self.search_my_move(env) # next move # on returning search path # update: N, W, Q, U if self.mem is not None: self.mem.update(key, action_t, leaf_v) n = self.var_n[key][ action_t] = self.var_n[key][action_t] - virtual_loss + 1 w = self.var_w[key][ action_t] = self.var_w[key][action_t] + virtual_loss + leaf_v q = w / n if self.mem is not None: q = (1.0 - self.beta) * w / n + (self.beta) * self.mem.get_amaf_q( key, action_t) #self.var_q[key][action_t] = (1.0 - self.beta) * w / n + (self.beta) * self.mem.get_amaf_q(key, action_t) self.var_q[key][action_t] = q return leaf_v
def convert_to_training_data(data): """ Helper function to convert saved data to training data format :param data: format is SelfPlayWorker.buffer :return: """ state_list = [] policy_list = [] z_list = [] for state, policy, z in data: board = list(state) board = np.reshape(board, (8, 5)) env = GomokuEnv().update(board, 0) black_ary, white_ary = env.black_and_white_plane() state = [black_ary, white_ary] if env.player_turn() == Player.black else [white_ary, black_ary] state_list.append(state) policy_list.append(policy) z_list.append(z) return np.array(state_list), np.array(policy_list), np.array(z_list)
def play_game(self, best_model, ng_model): ''' Plays a single game between the best model and candidate model''' env = GomokuEnv().reset() best_player = GomokuPlayer(self.config, best_model, play_config=self.config.eval.play_config) ng_player = GomokuPlayer(self.config, ng_model, play_config=self.config.eval.play_config) best_is_white = random() < 0.5 if not best_is_white: black, white = best_player, ng_player else: black, white = ng_player, best_player env.reset() while not env.done: if env.player_turn() == Player.black: action = black.action(env.board, env.turn) else: action = white.action(env.board, env.turn) env.step(action) # record the winner ng_win = None if env.winner == Winner.white: if best_is_white: ng_win = 0 else: ng_win = 1 elif env.winner == Winner.black: if best_is_white: ng_win = 1 else: ng_win = 0 return ng_win, best_is_white