def __init__(self, env_fns, spaces=None): """ envs: list of gym environments to run in subprocesses """ self.waiting = False self.closed = False nenvs = len(env_fns) self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) self.ps = [ Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns) ] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang p.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces', None)) observation_space, action_space = self.remotes[0].recv() self.remotes[0].send(('get_agent_types', None)) self.agent_types = self.remotes[0].recv() VecEnv.__init__(self, len(env_fns), observation_space, action_space)
def __init__(self, num_env, env_name, max_episode_steps, start_index, use_visual, spaces=None): """ Arguments: """ self.waiting = False self.closed = False self.remotes, self.work_remotes = zip( *[Pipe() for _ in range(num_env)]) self.ps = [] rank = 0 for (work_remote, remote) in zip(self.work_remotes, self.remotes): self.ps += [ Process(target=worker, args=(work_remote, remote, env_name, max_episode_steps, (start_index + rank), use_visual)) ] rank += 1 for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang p.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces', None)) observation_space, action_space = self.remotes[0].recv() self.viewer = None VecEnv.__init__(self, num_env, observation_space, action_space)
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) self.ts = np.zeros(len(self.envs), dtype='int') self.actions = None
def __init__(self, env_specs, auto_reset=True): self.env_ids, env_fns = zip(*env_specs) assert len(set(self.env_ids)) == len(self.env_ids) self.auto_reset = auto_reset self.actions = None self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) shapes, dtypes = {}, {} self.keys = [] obs_space = env.observation_space if isinstance(obs_space, spaces.Dict): assert isinstance(obs_space.spaces, OrderedDict) for key, box in obs_space.spaces.items(): assert isinstance(box, spaces.Box) shapes[key] = box.shape dtypes[key] = box.dtype self.keys.append(key) else: box = obs_space assert isinstance(box, spaces.Box) self.keys = [None] shapes, dtypes = { None: box.shape }, { None: box.dtype } self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] # Initializes base class VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) # Get info about env type (agent typesto distinguish between good guys and bad guys) # If the environment does not make this distinction, we set them all to 'agent' if hasattr(env, 'agents') and all( [hasattr(a, 'adversary') for a in env.agents]): self.agent_types = [ 'adversary' if a.adversary else 'agent' for a in env.agents ] else: self.agent_types = ['agent' for _ in range(env.nagents)] if hasattr(env, 'agents') and all( [hasattr(a, 'color') for a in env.agents]): self.agent_colors = [a.color for a in env.agents] else: cm = plt.cm.get_cmap('tab20') self.agent_colors = [ np.array(cm(float(i) / float(env.nagents))[:3]) for i in range(env.nagents) ] self.ts = np.zeros(len(self.envs), dtype='int') self.actions = None
def __init__(self, vec_env, exploration_f, state_encoder=None, root_dir=os.environ.get('RETRO_ROOT_DIR'), record_dir=os.environ.get('RETRO_RECORD_DIR'), save_states=os.environ.get('RETRO_SAVESTATE') == "true"): VecEnv.__init__(self, vec_env.num_envs, vec_env.observation_space, vec_env.action_space) self.env_ids = vec_env.env_ids self.vec_env = vec_env self.state_encoder = state_encoder if root_dir is not None: self.log_files = [ open(root_dir + "/" + self.env_ids[env_idx] + "/log", "w") for env_idx in range(self.num_envs) ] else: self.log_files = [sys.stdout for env_idx in range(self.num_envs)] if save_states: self.save_state_dirs = [ record_dir + "/" + self.env_ids[env_idx] for env_idx in range(self.num_envs) ] else: self.save_state_dirs = [None for env_idx in range(self.num_envs)] self.explorations = [ exploration_f(env_idx, self.env_ids[env_idx], log_file=self.log_files[env_idx], save_state_dir=self.save_state_dirs[env_idx]) for env_idx in range(self.num_envs) ] self.actions = None
def __init__(self, env_fns): """ Arguments: env_fns: iterable of callables functions that build environments """ self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) obs_space = env.observation_space self._has_vector_obs = hasattr(self.observation_space, 'spaces') # if self._has_vector_obs: # self.keys = ['visual', 'vector'] # shapes = { # 'visual':obs_space[0].shape, # 'vector':obs_space[1].shape # } # dtypes = { # 'visual':obs_space[0].dtype, # 'vector':obs_space[1].dtype # } # else: self.keys, shapes, dtypes = obs_space_info(obs_space) self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None self.spec = self.envs[0].spec
def __init__(self, env_fns, spaces=None): """ Arguments: env_fns: iterable of callables - functions that create environments to run in subprocesses. Need to be cloud-pickleable """ self.waiting = False self.closed = False nenvs = len(env_fns) self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) self.ps = [ Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns) ] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang p.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces', None)) observation_space, action_space, randomization_space = self.remotes[ 0].recv() self.randomization_space = randomization_space self.viewer = None VecEnv.__init__(self, len(env_fns), observation_space, action_space)
def __init__(self, env_fns, spaces=None): """ envs: list of gym environments to run in subprocesses """ self.waiting = False self.closed = False nenvs = len(env_fns) self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) self.ps = [ Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns) ] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang p.start() for remote in self.work_remotes: remote.close() env = env_fns[0]() o = env.reset() n = len(o) dim_o = o[0].shape[-1] if gym.__version__ == "0.9.4": obs = gym.spaces.Box(low=-np.inf, high=+np.inf, shape=(n, dim_o)) else: obs = gym.spaces.Box(low=-np.inf, high=+np.inf, shape=(n, dim_o), dtype=np.float32) act = gym.spaces.Discrete(5) self.nenvs = nenvs VecEnv.__init__(self, nenvs, obs, act)
def __init__(self, env_fns, spaces=None): """ envs: list of gym environments to run in subprocesses """ self.waiting = False self.closed = False self.task_pool = TaskPool(timeout=10) nenvs = len(env_fns) self.actors = [] self.actor_to_i = {} remote_actor = ray.remote(Actor) for i in range(nenvs): actor = remote_actor.remote(i, env_fns[i]) self.actors.append(actor) self.actor_to_i[actor] = i observation_space, action_space = ray.get( self.actors[0].get_spaces.remote()) VecEnv.__init__(self, len(env_fns), observation_space, action_space) self.results = [([0] * OBSERVATION_SPACE, 0, False, { "bad": True })] * self.num_envs
def __init__(self, env_fns): """ Arguments: env_fns: iterable of callables functions that build environments """ self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) obs_space = env.observation_space self.n_actors = env.n_actors self.keys, shapes, dtypes = obs_space_info(obs_space) self.buf_obs = { k: np.zeros(( self.num_envs, self.n_actors, ) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool) self.buf_rews = np.zeros(( self.num_envs, env.n_actors, ), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None self.spec = self.envs[0].spec
def __init__(self, env_fns, spaces=None, context='spawn', in_series=1): """ Arguments: env_fns: iterable of callables - functions that create environments to run in subprocesses. Need to be cloud-pickleable in_series: number of environments to run in series in a single process (e.g. when len(env_fns) == 12 and in_series == 3, it will run 4 processes, each running 3 envs in series) """ self.waiting = False self.closed = False self.in_series = in_series nenvs = len(env_fns) assert nenvs % in_series == 0, "Number of envs must be divisible by number of envs to run in series" self.nremotes = nenvs // in_series env_fns = np.array_split(env_fns, self.nremotes) ctx = mp.get_context(context) self.remotes, self.work_remotes = zip( *[ctx.Pipe() for _ in range(self.nremotes)]) self.ps = [ ctx.Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns) ] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang with clear_mpi_env_vars(): p.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces_spec', None)) observation_space, action_space, self.spec, self.n_actors = self.remotes[ 0].recv() self.viewer = None VecEnv.__init__(self, nenvs, observation_space, action_space)
def __init__(self, env_fns): """ Creates a multiprocess vectorized wrapper for multiple environments :param env_fns: ([Gym Environment]) Environments to run in subprocesses """ self.waiting = False self.closed = False n_envs = len(env_fns) self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(n_envs)]) self.processes = [ Process(target=_worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns) ] for process in self.processes: process.daemon = True # if the main process crashes, we should not cause things to hang process.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces', None)) observation_space, action_space = self.remotes[0].recv() VecEnv.__init__(self, len(env_fns), observation_space, action_space)
def __init__(self, ordinary_env): self.env = ordinary_env self.nenv = len(ordinary_env) self.action_space = ordinary_env[0].action_space self.observation_space = ordinary_env[0].observation_space VecEnv.__init__(self, num_envs=self.nenv, observation_space=ordinary_env[0].observation_space, action_space=ordinary_env[0].action_space)
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) if all([hasattr(a, 'adversary') for a in env.agents]): self.agent_types = ['adversary' if a.adversary else 'agent' for a in env.agents] else: self.agent_types = ['agent' for _ in env.agents] self.ts = np.zeros(len(self.envs), dtype='int') self.actions = None
def __init__(self, app_name, num_envs=2, base=0): self.name = app_name self.envs = [env_wrapper(app_name, base+idx) for idx in range(num_envs)] self.num_envs = num_envs env = self.envs[0] self.observation_space = env.ob_space self.action_space = env.ac_space VecEnv.__init__(self, num_envs, env.ob_space, env.ac_space) self.ts = np.zeros(num_envs, dtype='int') self.actions = None
def reset(self, now_agent_num, now_box_num=None): if now_box_num is None: for remote in self.remotes: remote.send((['reset',now_agent_num], None)) else: for remote in self.remotes: remote.send((['reset_pb',now_agent_num, now_box_num], None)) results = [remote.recv() for remote in self.remotes] obs, available_actions = zip(*results) self.remotes[0].send(('get_spaces', None)) observation_space, action_space = self.remotes[0].recv() VecEnv.__init__(self, self.length, observation_space, action_space) return np.stack(obs), np.stack(available_actions)
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) obs_spaces = self.observation_space.spaces if isinstance( self.observation_space, gym.spaces.Tuple) else (self.observation_space, ) self.buf_obs = [] self.buf_dones = [] self.buf_rews = [] self.buf_infos = [] self.actions = []
def __init__(self, env_fns): """ Arguments: env_fns: iterable of callables functions that build environments """ self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) self.transitions = [None for _ in range(self.num_envs)] self.actions = None self.spec = self.envs[0].spec
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) self.agent_type_left = [ 'agent' for _ in range(env.num_controlled_lagents) ] self.agent_type_right = [ 'adversary' for _ in range(env.num_controlled_ragents) ] self.agent_types = self.agent_type_left + self.agent_type_right self.ts = np.zeros(len(self.envs), dtype='int') self.actions = None
def __init__(self, env_fns, spaces=None, level_selector=None, experiment=None, dataset=None): """ Arguments: env_fns: iterable of callables functions that build environments """ self.envs = [fn() for fn in env_fns] self.num_envs = len(env_fns) env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) obs_space = env.observation_space self.keys, shapes, dtypes = obs_space_info(obs_space) self.buf_obs = { k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None self.spec = self.envs[0].spec self.finsihed = [False for _ in range(self.num_envs)] self.last_mes = [None for _ in range(self.num_envs)] self.level_selector = level_selector if USE_IMMITATION_ENV: self.featurizer = TDCFeaturizer(92, 92, 84, 84, feature_vector_size=1024, learning_rate=0, experiment_name=experiment) self.featurizer.load(dataset) video_dataset = generate_dataset(experiment, framerate=60, width=84, height=84)[0] self.featurized_dataset = self.featurizer.featurize(video_dataset) self.checkpoint_indexes = [0] * nenvs self.rewards = 0 self.counter = 0
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) # (** EDITED **) Set Agent Type # Google Football Env.agents 사용 불가 -> 직접 agent type 설정 """if all([hasattr(a, 'adversary') for a in env.agents]): self.agent_types = ['adversary' if a.adversary else 'agent' for a in env.agents] else: self.agent_types = ['agent' for _ in env.agents]""" self.agent_types = ['agent' for _ in range(11)] self.ts = np.zeros(len(self.envs), dtype='int') self.actions = None
def __init__(self, env_fns): """ env_fns: List of functions that create gym environments to run in subprocesses """ self.waiting = False self.closed = False self.nenvs = len(env_fns) self.actors = [EnvActor.remote(fn) for fn in env_fns] observation_space, action_space = ray.get( self.actors[0].get_space.remote()) VecEnv.__init__(self, len(env_fns), observation_space, action_space) self.step_futures = None
def __init__(self, env_fns, spaces=None, level_selector=None, experiment=None, dataset=None): """ envs: list of gym environments to run in subprocesses """ self.waiting = False self.closed = False nenvs = len(env_fns) self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) self.ps = [ Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn), level_selector)) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns) ] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang print("start processes") p.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces', None)) observation_space, action_space = self.remotes[0].recv() VecEnv.__init__(self, len(env_fns), observation_space, action_space) if USE_IMMITATION_ENV: self.featurizer = TDCFeaturizer(84, 84, 84, 84, feature_vector_size=1024, learning_rate=0, experiment_name=experiment) self.featurizer.load(dataset) video_dataset = generate_dataset(experiment, framerate=60, width=84, height=84)[0] self.featurized_dataset = self.featurizer.featurize(video_dataset) self.checkpoint_indexes = [0] * nenvs self.rewards = 0 self.counter = 0
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] o = env.reset() n = len(o) dim_o = o[0].shape[-1] if gym.__version__ == "0.9.4": obs = gym.spaces.Box(low=-np.inf, high=+np.inf, shape=(n, dim_o)) else: obs = gym.spaces.Box(low=-np.inf, high=+np.inf, shape=(n, dim_o), dtype=np.float32) act = gym.spaces.Discrete(5) nenvs = 1 VecEnv.__init__(self, nenvs, obs, act)
def new_starts_obs_pb(self, starts, now_agent_num, now_box_num, now_num_processes): i = 0 results = [] for remote in self.remotes: if i < now_num_processes: tmp_list = ['new_starts_obs_pb', now_agent_num, now_box_num, starts[i]] remote.send((tmp_list, None)) i += 1 i = 0 for remote in self.remotes: if i < now_num_processes: results.append(remote.recv()) i += 1 self.remotes[0].send(('get_spaces', None)) observation_space, action_space = self.remotes[0].recv() VecEnv.__init__(self, self.length, observation_space, action_space) return np.stack(results)
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) obs_spaces = self.observation_space.spaces if isinstance( self.observation_space, gym.spaces.Tuple) else (self.observation_space, ) self.buf_obs = [ np.zeros((self.num_envs, ) + tuple(s.shape), s.dtype) for s in obs_spaces ] self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None
def __init__(self, env_fns, spaces=None): """ envs: list of gym environments to run in subprocesses """ self.waiting = False self.closed = False nenvs = len(env_fns) self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang p.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces', None)) observation_space, action_space = self.remotes[0].recv() VecEnv.__init__(self, len(env_fns), observation_space, action_space)
def __init__(self, env_id): print ("**** ", env_id, platform.system()) # env = UnityEnv(env_id, multiagent=True) env_id = "hopper" # env_id = "walker" if platform.system() == 'Windows': env_path = os.path.join('envs', env_id+'-x16', 'Unity Environment.exe') elif platform.system() == 'Darwin': # MacOS env_path = os.path.join('envs', env_id+'-x16') elif platform.system() == 'Linux': env_path = os.path.join('envs', env_id+'-x16') print ("**** Override", env_path, env_id) env = UnityEnv(env_path, multiagent=True) self.env = env env.num_envs = env.number_agents VecEnv.__init__(self, env.num_envs, env.observation_space, env.action_space) # obs_space = env.observation_space # spec = env.spec # self.keys, shapes, dtypes = obs_space_info(obs_space) # self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } # self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) # self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] # Fake Monitor self.tstart = time.time() self.results_writer = ResultsWriter( "filename", header={"t_start": time.time(), 'env_id' : env.spec and env.spec.id}, extra_keys=() + () ) self.reset_keywords = () self.info_keywords = () self.allow_early_resets = True self.rewards = None self.needs_reset = True self.episode_rewards = [] self.episode_lengths = [] self.episode_times = [] self.total_steps = 0 self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()
def __init__(self, env_fns, h5_path=None, spaces=None, context='spawn'): """ Arguments: env_fns: iterable of callables - functions that create environments to run in subprocesses. Need to be cloud-pickleable """ self.waiting = False self.closed = False nenvs = len(env_fns) ctx = mp.get_context(context) self.remotes, self.work_remotes = zip( *[ctx.Pipe() for _ in range(nenvs)]) self.ps = [ ctx.Process(target=worker_acc, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns) ] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang with clear_mpi_env_vars(): p.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces_spec', None)) observation_space, action_space, self.spec = self.remotes[0].recv() self.viewer = None # load HighD data self.highd_f = h5py.File(h5_path, "r") self.highd_vels = self.highd_f['initial_velocities'] self.highd_accs = self.highd_f['x_accelerations'] self.staliro_trace_queue = None # self.obs_pos = None # self.obs_vel = None # self.obs_driver = None VecEnv.__init__(self, len(env_fns), observation_space, action_space)
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) self.hier = False shapes, dtypes = {}, {} self.keys = [] obs_space = env.observation_space if isinstance(obs_space, spaces.Dict): assert isinstance(obs_space.spaces, OrderedDict) for key, box in obs_space.spaces.items(): assert isinstance(box, spaces.Box) shapes[key] = box.shape dtypes[key] = box.dtype self.keys.append(key) elif isinstance(obs_space, spaces.Tuple): tokens, box = obs_space.spaces self.buf_tokens = { None: np.zeros((self.num_envs, ) + (tokens.n, )) } self.hier = True self.keys = [None] shapes, dtypes = {None: box.shape}, {None: box.dtype} else: box = obs_space assert isinstance(box, spaces.Box) self.keys = [None] shapes, dtypes = {None: box.shape}, {None: box.dtype} self.buf_obs = { k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.final_obs = { k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None
def __init__(self, env_fns, spaces=None): """ envs: list of gym environments to run in subprocesses """ self.waiting = False self.closed = False self.task_pool = TaskPool(timeout=10) nenvs = len(env_fns) self.actors = [] self.actor_to_i = {} remote_actor = ray.remote(Actor) for i in range(nenvs): actor = remote_actor.remote(i, env_fns[i]) self.actors.append(actor) self.actor_to_i[actor] = i observation_space, action_space = ray.get(self.actors[0].get_spaces.remote()) VecEnv.__init__(self, len(env_fns), observation_space, action_space) self.results = [([0] * OBSERVATION_SPACE, 0, False, {"bad": True})] * self.num_envs