def __init__(self, env_fns, start_method=None):
        self.waiting = False
        self.closed = False
        n_envs = len(env_fns)

        if start_method is None:
            # Fork is not a thread safe method (see issue #217)
            # but is more user friendly (does not require to wrap the code in
            # a `if __name__ == "__main__":`)
            forkserver_available = "forkserver" in multiprocessing.get_all_start_methods(
            )
            start_method = "forkserver" if forkserver_available else "spawn"
        ctx = multiprocessing.get_context(start_method)

        self.remotes, self.work_remotes = zip(
            *[ctx.Pipe() for _ in range(n_envs)])
        self.processes = []
        for work_remote, remote, env_fn in zip(self.work_remotes, self.remotes,
                                               env_fns):
            args = (work_remote, remote, CloudpickleWrapper(env_fn))
            # daemon=True: if the main process crashes, we should not cause things to hang
            process = ctx.Process(target=_worker, args=args, daemon=True)  # pytype:disable=attribute-error
            process.start()
            self.processes.append(process)
            work_remote.close()

        self.remotes[0].send(("get_spaces", None))
        observation_space, action_space = self.remotes[0].recv()
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)
Exemple #2
0
    def __init__(self,
                 env_fns: List[Tuple[Callable[[], Union[str, gym.Env]],
                                     Callable[[torch.Tensor],
                                              Union[torch.Tensor, np.ndarray,
                                                    list]]]]):
        """ Constructor

        Args:
            env_fns: a list of pairs of functions, the first of which create one environment and the second is the mapper
                that converts the action outputs from the model to what can be accepted by the env object. 
                See VectorizedMultiAgentEnvWrapper.MultiAgentEnvWrapper for details
        """
        self.envs = [
            self.MultiAgentEnvWrapper(fn_pair[0](), fn_pair[1])
            for fn_pair in env_fns
        ]
        env = self.envs[0]
        VecEnv.__init__(self, len(env_fns), env.observation_space,
                        env.action_space)
        obs_space = env.observation_space
        self.keys, shapes, dtypes = obs_space_info(obs_space)

        self.buf_obs = OrderedDict([(k,
                                     np.zeros(
                                         (self.num_envs, ) + tuple(shapes[k]),
                                         dtype=dtypes[k])) for k in self.keys])
        self.buf_dones = np.zeros((self.num_envs, 1), dtype=bool)
        self.buf_rews = np.zeros((self.num_envs, 1, env.agent_num),
                                 dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None
        self.metadata = env.metadata
    def __init__(self, env_fns: List[Callable[[], gym.Env]]):
        self.envs = [fn() for fn in env_fns]
        env = self.envs[0]
        VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
        obs_space = env.observation_space
        self.keys, shapes, dtypes = obs_space_info(obs_space)

        self.buf_obs = OrderedDict([(k, np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k])) for k in self.keys])
        self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
        self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None
        self.metadata = env.metadata
    def __init__(self, env_fns):
        self.envs = [fn() for fn in env_fns]
        env = self.envs[0]
        boxes = {}
        # Make observation space 1 x d, representing a single node.
        action_space = graph_action_space()
        observation_space = graph_obs_space(env.observation_space)
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)

        self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool)
        self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None
        self.metadata = env.metadata