def main(): # Use alphazero self-play for data generation agents_meta = parse_schedule() # worker variable of main process board = Board() sigexit = Event() sigexit.set() # pre-set signal so main proc generator will iterate only once # subprocess data generator helper = DataHelper(data_files=[]) helper.set_agents_meta(agents_meta=agents_meta) generator = helper.generate_batch(TRAINING_CONFIG["batch_size"]) # start generating with h5py.File(f"{DATA_CONFIG['data_path']}/latest.train.hdf5", 'a') as hf: for state_batch, value_batch, probs_batch in generator: for batch_name in ("state_batch", "value_batch", "probs_batch"): if batch_name not in hf: shape = locals()[batch_name].shape hf.create_dataset(batch_name, (0, *shape), maxshape=(None, *shape)) hf[batch_name].resize(hf[batch_name].shape[0] + 1, axis=0) hf[batch_name][-1] = locals()[batch_name] # prevent main proc from generating data too quick # since sigexit has been set, proc will iterate only once run_proc(helper.buffer, helper.buffer_size, helper.lock, sigexit, agents_meta, board) board.reset()
def main(): # Use alphazero self-play for data generation agents_meta = parse_schedule() # worker variable of main process board = Board() sigexit = Event() sigexit.set( ) # pre-set signal so main proc generator will iterate only once # subprocess data generator helper = DataHelper(data_files=[]) helper.set_agents_meta(agents_meta=agents_meta) generator = helper.generate_batch(TRAINING_CONFIG["batch_size"]) # start generating with h5py.File(f"{DATA_CONFIG['data_path']}/latest.train.hdf5", 'a') as hf: for state_batch, value_batch, probs_batch in generator: for batch_name in ("state_batch", "value_batch", "probs_batch"): if batch_name not in hf: shape = locals()[batch_name].shape hf.create_dataset(batch_name, (0, *shape), maxshape=(None, *shape)) hf[batch_name].resize(hf[batch_name].shape[0] + 1, axis=0) hf[batch_name][-1] = locals()[batch_name] # prevent main proc from generating data too quick # since sigexit has been set, proc will iterate only once run_proc(helper.buffer, helper.buffer_size, helper.lock, sigexit, agents_meta, board) board.reset()
def dual_play(agents, board=None, verbose=False, graphic=False): """ Play with 2 players. Params: agents: { Player.black: agent1, Player.white: agent2 }. board: initial board state. Start player will be determined here. verbose: if true, then return value will be in the form of training data. Returns: if verbose set to True: [(state_inputs, final_score, action_probs)] Each element is a numpy.array. else: winner """ if board is None: board = Board() elif board.status["is_end"]: board.reset() if verbose is True: result = [] else: result = Player.none while True: # set the current agent cur_agent = agents[board.status["cur_player"]] # evaluate board state and get action if verbose is True: _, action_probs, next_move = cur_agent.eval_state(board) result.append([ board.encoded_states(), board.status["cur_player"], action_probs ]) else: next_move = cur_agent.get_action(board) # update board board.apply_move(next_move) if graphic: print(board) # end judge if board.status["is_end"]: winner = board.status["winner"] if graphic: print("Game ends. winner is {}.".format(winner)) # format output result if verbose is True: result = [( state[0], np.array(Player.calc_score(state[1], winner)), state[2] ) for state in result] else: result = winner return result
def eval_agents(agents, num_games=9): """ Eval the performance of two agents by multiple game simulation. Params: agents: [agent1, agent2] num_games: number of games simulated, default to BO9. board: a pre-init board can be passed to avoid re-construct. Returns: [win_rate(a) for a in agents] """ print("---------Evaluating agents-------------") board = Board() players = [Player.black, Player.white] win_cnts = np.zeros(2) for i in range(num_games): winner = dual_play(dict(zip(players, agents)), board) try: win_idx = players.index(winner) win_cnts[win_idx] += 1 print("Round {} ends, winner is <{}: {}>;".format(i + 1, winner, agents[win_idx])) except ValueError: # tie win_cnts += 0.5 print("Round {} ends, tie game;".format(i + 1)) players.reverse() # exchange the start player board.reset() [agent.reset() for agent in agents] win_rates = win_cnts / num_games print("Win rate:") print("{}: {}".format(agents[0], win_rates[0])) print("{}: {}".format(agents[1], win_rates[1])) print("---------------------------------------") return tuple(win_rates)