예제 #1
0
    def __init__(self, world, agents, test_each=0, max_batch=16):
        import numpy as np

        if not isinstance(agents, list):
            agents = [agents]

        self.get_observation_shape = lambda: world.obs_shape
        self.get_action_shape = lambda: world.act_shape
        self.get_reward_shape = lambda: world.rew_shape

        cache = []

        def trajectories(agent, n):
            trajs = world.trajectories(agent, n)
            for t in trajs:
                cache.append(t)
            return trajs

        self.trajectories = trajectories

        def test(agent):
            mandalka.evaluate(agent)
            todo = test_each
            while todo >= 1:
                batch = min(max_batch, todo)
                trajectories(agent, n=batch)
                todo -= batch

        for i, a in enumerate(agents):
            test(Agent.build(a, self, i))

        rng = np.random.RandomState()

        def trajectories(_, n):
            n = int(n)
            assert n >= 1
            idx = rng.choice(len(cache), size=n)
            # TODO: this is unsafe, values could be modified outside
            return [cache[i] for i in idx]

        self.trajectories = trajectories
        self.num_trajectories = lambda: len(cache)