def action(self, board): env = GameEnv().update(board) key = self.counter_key(env) #No_White = True #White = ['W', 'X', 'Y'] #for i in range(3): # for j in range(3): # if env.board[i][j] in White: # No_White = False #if No_White: action = 999 #else: for tl in range(self.play_config.thinking_loop): self.search_moves(board) policy = self.calc_policy(board) action = int(np.random.choice(range(self.labels_n), p=policy)) action_by_value = int( np.argmax(self.var_q[key] + (self.var_n[key] > 0) * 100)) if action == action_by_value or env.turn < self.play_config.change_tau_turn: break # this is for play_gui, not necessary when training. self.thinking_history[env.observation] = HistoryItem( action, policy, list(self.var_q[key]), list(self.var_n[key])) self.moves.append([env.observation, list(policy)]) return action
def play_game(self, best_model, ng_model): env = GameEnv().reset() best_player = GamePlayer(self.config, best_model, play_config=self.config.eval.play_config) ng_player = GamePlayer(self.config, ng_model, play_config=self.config.eval.play_config) best_is_white = random() < 0.5 if not best_is_white: black, white = best_player, ng_player else: black, white = ng_player, best_player env.reset() while not env.done: if env.player_turn() == Player.black: action = black.action(env.board) else: action = white.action(env.board) env.step(action) ng_win = None if env.winner == Winner.white: if best_is_white: ng_win = 0 else: ng_win = 1 elif env.winner == Winner.black: if best_is_white: ng_win = 1 else: ng_win = 0 return ng_win, best_is_white
def start(config: Config): PlayWithHumanConfig().update_play_config(config.play) game_model = PlayWithHuman(config) while True: env = GameEnv().reset() human_is_black = random() < 0.5 game_model.start_game(human_is_black) while not env.done: if env.player_turn() == Player.black: if not human_is_black: action = game_model.move_by_ai(env) print("IA moves to: " + str(action)) else: action = game_model.move_by_human(env) print("You move to: " + str(action)) else: if human_is_black: action = game_model.move_by_ai(env) print("IA moves to: " + str(action)) else: action = game_model.move_by_human(env) print("You move to: " + str(action)) env.step(action) env.render() print("\nEnd of the game.") print("Game result:") if env.winner == Winner.white: print("X wins") elif env.winner == Winner.black: print("O wins") else: print("Game was a draw")
async def start_search_my_move(self, board): self.running_simulation_num += 1 with await self.sem: # reduce parallel search number env = GameEnv().update(board) #print(env.board) #input() leaf_v = await self.search_my_move(env, is_root_node=True) self.running_simulation_num -= 1 return leaf_v
def calc_policy(self, board): #calc π(a|s0) pc = self.play_config env = GameEnv().update(board) key = self.counter_key(env) if env.turn < pc.change_tau_turn: #print(key) #print('self.var_n[key]',self.var_n[key]) #print('np.sum(self.var_n[key]',np.sum(self.var_n[key])) #input() return self.var_n[key] / np.sum(self.var_n[key]) # tau = 1 else: action = np.argmax(self.var_n[key]) # tau = 0 ret = np.zeros(self.labels_n) ret[action] = 1 return ret
def __init__(self, config: Config): self.config = config self.model = None # type: GameModel self.loaded_filenames = set() self.loaded_data = {} self.dataset = None self.optimizer = None self.dbx = None self.version = 0 # Change to dynamic lookup from Drop Box Files self.env = GameEnv() self.raw_timestamp = None self.best_is_white = True self.play_files_per_generation = 15 # each file includes 25 games so each generation adds 375 games self.max_play_files = 300 # at final there are alawys 7500 games to look at from previous 20 generations self.min_play_files_to_learn = 0 self.play_files_on_dropbox = 0
def convert_to_training_data(data): state_list = [] policy_list = [] z_list = [] for state, policy, z in data: board = list(state) board = np.reshape(board, (3, 3)) env = GameEnv().update(board) black_ary, white_ary = env.black_and_white_plane() state = [ black_ary, white_ary ] if env.player_turn() == Player.black else [white_ary, black_ary] state_list.append(state) policy_list.append(policy) z_list.append(z) return np.array(state_list), np.array(policy_list), np.array(z_list)
def play_game(self, best_model, ng_model): env = GameEnv().reset() if (self.raw_timestamp != self.dbx.files_get_metadata( '/model/model_best_weight.h5').client_modified): print('A newer model version is available - giving up this match') ng_win = 0 self.best_is_white = True return ng_win, self.best_is_white best_player = GamePlayer(self.config, best_model, play_config=self.config.eval.play_config) ng_player = GamePlayer(self.config, ng_model, play_config=self.config.eval.play_config) self.best_is_white = not self.best_is_white if not self.best_is_white: black, white = best_player, ng_player else: black, white = ng_player, best_player env.reset() while not env.done: if env.player_turn() == Player.black: action = black.action(env.board) else: action = white.action(env.board) env.step(action) ng_win = None if env.winner == Winner.white: if self.best_is_white: ng_win = 0 else: ng_win = 1 elif env.winner == Winner.black: if self.best_is_white: ng_win = 1 else: ng_win = 0 return ng_win, self.best_is_white
def start(config: Config): tf_util.set_session_config(per_process_gpu_memory_fraction=0.2) return SelfPlayWorker(config, env=GameEnv()).start()