def handle_batch(self, batch):
        sub_batches = dict()

        players = np.stack([task.sim.active_player for task in batch])
        sims = [task.sim for task in batch]
        boards = np.stack(convert_state_batch(sims))
        possible_actions = [task.sim.get_possible_actions() for task in batch]
        scores = np.empty((len(batch), batch[0].sim.board.size))

        for idx, task in enumerate(batch):
            iteration = task.metadata["iteration"]
            if iteration not in sub_batches:
                sub_batches[iteration] = list()

            sub_batches[iteration].append(idx)

        for iteration, batch in sub_batches.items():
            nn_agent = self.nn_agents[iteration]
            indices = np.asarray(batch)

            batch_players = players[indices]
            batch_boards = boards[indices]
            scores[indices, ...] = nn_agent.get_scores(batch_boards,
                                                       batch_players)

        return scores
Beispiel #2
0
    def handle_batch(self, batch):
        batch_size = len(batch)
        chunksize = int(self.config["GLOBAL"]["chunksize"])
        depth = int(self.config["NMCTSAgent"]["search_depth"])

        sims = list()
        active_players = list()
        for task in batch:
            board_position, active_player = task.metadata["sample"]
            sim = HexGame(active_player, board_position, active_player)
            active_players.append(active_player)
            sims.append(sim)
        converted_boards = convert_state_batch(sims)
        policies = self.nn_agent.get_scores(converted_boards,
                                            np.asarray(active_players))

        for idx, task in enumerate(batch):
            agent = NMCTSAgent(depth=depth,
                               env=sims[idx],
                               network_policy=policies[idx])

            task.metadata.update({"sim": sims[idx], "agent": agent})

            task.gen = agent.deferred_plan()

        return [None] * len(batch)
Beispiel #3
0
    def handle_batch(self, batch):
        players = np.stack([task.sim.active_player for task in batch])
        sims = [task.sim for task in batch]
        boards = np.stack(convert_state_batch(sims))
        possible_actions = [task.sim.get_possible_actions() for task in batch]
        scores = self.nn_agent.get_scores(boards, players)

        actions = list()
        for score, possible in zip(scores, possible_actions):
            action_idx = np.argmax(score[possible])
            actions.append(possible[action_idx])
        return actions
def build_apprentice(samples, labels, config, workers):
    board_size = int(config["GLOBAL"]["board_size"])
    chunksize = int(config["GLOBAL"]["chunksize"])

    boards, players = samples
    sim_args = zip(players, boards, players)
    if workers:
        data = [sim for sim in workers.starmap(
            HexGame,
            sim_args,
            chunksize=chunksize)]
    else:
        data = [sim for sim in map(
            HexGame,
            sim_args)]
    data = np.stack(convert_state_batch(data))

    labels = tf.one_hot(labels, board_size ** 2)
    labels = (labels[:, 0, :], labels[:, 1, :])

    tf.keras.backend.clear_session()
    network = train_network(data, labels, config)
    return NNAgent(network)
def load_data(data_file, label_file, config, workers=None, max_size=None):
    boards = np.load(data_file)['arr_0']
    players = np.load(data_file)['arr_1']
    labels = np.load(label_file)['arr_0']

    if workers:
        chunksize = int(config["GLOBAL"]["chunksize"])
        sim_args = zip(players, boards, players)
        data = [
            sim
            for sim in workers.starmap(HexGame, sim_args, chunksize=chunksize)
        ]
    else:
        data = [sim for sim in map(HexGame, players, boards, players)]
    data = np.stack(convert_state_batch(data))

    board_size = int(config["GLOBAL"]["board_size"])
    labels = tf.one_hot(labels, board_size**2)

    if max_size is None:
        return data, (labels[:, 0, :], labels[:, 1, :])
    else:
        return (data[:max_size, ...], (labels[:max_size,
                                              0, :], labels[:max_size, 1, :]))
Beispiel #6
0
 def handle_batch(self, batch):
     players = np.stack([task.sim.active_player for task in batch])
     sims = [task.sim for task in batch]
     boards = np.stack(convert_state_batch(sims))
     scores = self.nn_agent.get_scores(boards, players)
     return scores