def handle_batch(self, batch): sub_batches = dict() players = np.stack([task.sim.active_player for task in batch]) sims = [task.sim for task in batch] boards = np.stack(convert_state_batch(sims)) possible_actions = [task.sim.get_possible_actions() for task in batch] scores = np.empty((len(batch), batch[0].sim.board.size)) for idx, task in enumerate(batch): iteration = task.metadata["iteration"] if iteration not in sub_batches: sub_batches[iteration] = list() sub_batches[iteration].append(idx) for iteration, batch in sub_batches.items(): nn_agent = self.nn_agents[iteration] indices = np.asarray(batch) batch_players = players[indices] batch_boards = boards[indices] scores[indices, ...] = nn_agent.get_scores(batch_boards, batch_players) return scores
def handle_batch(self, batch): batch_size = len(batch) chunksize = int(self.config["GLOBAL"]["chunksize"]) depth = int(self.config["NMCTSAgent"]["search_depth"]) sims = list() active_players = list() for task in batch: board_position, active_player = task.metadata["sample"] sim = HexGame(active_player, board_position, active_player) active_players.append(active_player) sims.append(sim) converted_boards = convert_state_batch(sims) policies = self.nn_agent.get_scores(converted_boards, np.asarray(active_players)) for idx, task in enumerate(batch): agent = NMCTSAgent(depth=depth, env=sims[idx], network_policy=policies[idx]) task.metadata.update({"sim": sims[idx], "agent": agent}) task.gen = agent.deferred_plan() return [None] * len(batch)
def handle_batch(self, batch): players = np.stack([task.sim.active_player for task in batch]) sims = [task.sim for task in batch] boards = np.stack(convert_state_batch(sims)) possible_actions = [task.sim.get_possible_actions() for task in batch] scores = self.nn_agent.get_scores(boards, players) actions = list() for score, possible in zip(scores, possible_actions): action_idx = np.argmax(score[possible]) actions.append(possible[action_idx]) return actions
def build_apprentice(samples, labels, config, workers): board_size = int(config["GLOBAL"]["board_size"]) chunksize = int(config["GLOBAL"]["chunksize"]) boards, players = samples sim_args = zip(players, boards, players) if workers: data = [sim for sim in workers.starmap( HexGame, sim_args, chunksize=chunksize)] else: data = [sim for sim in map( HexGame, sim_args)] data = np.stack(convert_state_batch(data)) labels = tf.one_hot(labels, board_size ** 2) labels = (labels[:, 0, :], labels[:, 1, :]) tf.keras.backend.clear_session() network = train_network(data, labels, config) return NNAgent(network)
def load_data(data_file, label_file, config, workers=None, max_size=None): boards = np.load(data_file)['arr_0'] players = np.load(data_file)['arr_1'] labels = np.load(label_file)['arr_0'] if workers: chunksize = int(config["GLOBAL"]["chunksize"]) sim_args = zip(players, boards, players) data = [ sim for sim in workers.starmap(HexGame, sim_args, chunksize=chunksize) ] else: data = [sim for sim in map(HexGame, players, boards, players)] data = np.stack(convert_state_batch(data)) board_size = int(config["GLOBAL"]["board_size"]) labels = tf.one_hot(labels, board_size**2) if max_size is None: return data, (labels[:, 0, :], labels[:, 1, :]) else: return (data[:max_size, ...], (labels[:max_size, 0, :], labels[:max_size, 1, :]))
def handle_batch(self, batch): players = np.stack([task.sim.active_player for task in batch]) sims = [task.sim for task in batch] boards = np.stack(convert_state_batch(sims)) scores = self.nn_agent.get_scores(boards, players) return scores