Esempio n. 1
0
 def __init__(self, env_specs, auto_reset=True):
     self.env_ids, env_fns = zip(*env_specs)
     assert len(set(self.env_ids)) == len(self.env_ids)
     self.auto_reset = auto_reset
     self.actions = None
     self.envs = [fn() for fn in env_fns]
     env = self.envs[0]
     VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
     shapes, dtypes = {}, {}
     self.keys = []
     obs_space = env.observation_space
     if isinstance(obs_space, spaces.Dict):
         assert isinstance(obs_space.spaces, OrderedDict)
         for key, box in obs_space.spaces.items():
             assert isinstance(box, spaces.Box)
             shapes[key] = box.shape
             dtypes[key] = box.dtype
             self.keys.append(key)
     else:
         box = obs_space
         assert isinstance(box, spaces.Box)
         self.keys = [None]
         shapes, dtypes = { None: box.shape }, { None: box.dtype }
     self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys }
     self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
     self.buf_rews  = np.zeros((self.num_envs,), dtype=np.float32)
     self.buf_infos = [{} for _ in range(self.num_envs)]
     self.actions = None
Esempio n. 2
0
    def __init__(self,
                 num_env,
                 env_name,
                 max_episode_steps,
                 start_index,
                 use_visual,
                 spaces=None):
        """
        Arguments:
        """
        self.waiting = False
        self.closed = False
        self.remotes, self.work_remotes = zip(
            *[Pipe() for _ in range(num_env)])
        self.ps = []
        rank = 0
        for (work_remote, remote) in zip(self.work_remotes, self.remotes):
            self.ps += [
                Process(target=worker,
                        args=(work_remote, remote, env_name, max_episode_steps,
                              (start_index + rank), use_visual))
            ]
            rank += 1
        for p in self.ps:
            p.daemon = True  # if the main process crashes, we should not cause things to hang
            p.start()
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces', None))
        observation_space, action_space = self.remotes[0].recv()
        self.viewer = None
        VecEnv.__init__(self, num_env, observation_space, action_space)
Esempio n. 3
0
    def __init__(self, env_fns):
        """
        Arguments:

        env_fns: iterable of callables      functions that build environments
        """
        self.envs = [fn() for fn in env_fns]
        env = self.envs[0]
        VecEnv.__init__(self, len(env_fns), env.observation_space,
                        env.action_space)
        obs_space = env.observation_space
        self._has_vector_obs = hasattr(self.observation_space, 'spaces')
        # if self._has_vector_obs:
        #     self.keys = ['visual', 'vector']
        #     shapes = {
        #         'visual':obs_space[0].shape,
        #         'vector':obs_space[1].shape
        #     }
        #     dtypes = {
        #         'visual':obs_space[0].dtype,
        #         'vector':obs_space[1].dtype
        #     }
        # else:
        self.keys, shapes, dtypes = obs_space_info(obs_space)

        self.buf_obs = {
            k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k])
            for k in self.keys
        }
        self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool)
        self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None
        self.spec = self.envs[0].spec
 def __init__(self, env_fns):
     self.envs = [fn() for fn in env_fns]
     env = self.envs[0]
     VecEnv.__init__(self, len(env_fns), env.observation_space,
                     env.action_space)
     self.ts = np.zeros(len(self.envs), dtype='int')
     self.actions = None
    def __init__(self, env_fns):
        """
        Creates a multiprocess vectorized wrapper for multiple environments

        :param env_fns: ([Gym Environment]) Environments to run in subprocesses
        """
        self.waiting = False
        self.closed = False
        n_envs = len(env_fns)
        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(n_envs)])
        self.processes = [
            Process(target=_worker,
                    args=(work_remote, remote, CloudpickleWrapper(env_fn)))
            for (work_remote, remote,
                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
        ]
        for process in self.processes:
            process.daemon = True  # if the main process crashes, we should not cause things to hang
            process.start()
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces', None))
        observation_space, action_space = self.remotes[0].recv()
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)
Esempio n. 6
0
    def __init__(self, env_fns):
        self.envs = [fn() for fn in env_fns]
        env = self.envs[0]

        # Initializes base class
        VecEnv.__init__(self, len(env_fns), env.observation_space,
                        env.action_space)

        # Get info about env type (agent typesto distinguish between good guys and bad guys)
        # If the environment does not make this distinction, we set them all to 'agent'
        if hasattr(env, 'agents') and all(
            [hasattr(a, 'adversary') for a in env.agents]):
            self.agent_types = [
                'adversary' if a.adversary else 'agent' for a in env.agents
            ]
        else:
            self.agent_types = ['agent' for _ in range(env.nagents)]

        if hasattr(env, 'agents') and all(
            [hasattr(a, 'color') for a in env.agents]):
            self.agent_colors = [a.color for a in env.agents]
        else:
            cm = plt.cm.get_cmap('tab20')
            self.agent_colors = [
                np.array(cm(float(i) / float(env.nagents))[:3])
                for i in range(env.nagents)
            ]

        self.ts = np.zeros(len(self.envs), dtype='int')
        self.actions = None
    def __init__(self, env_fns, spaces=None):
        """
        envs: list of gym environments to run in subprocesses
        """
        self.waiting = False
        self.closed = False
        self.task_pool = TaskPool(timeout=10)

        nenvs = len(env_fns)

        self.actors = []
        self.actor_to_i = {}
        remote_actor = ray.remote(Actor)
        for i in range(nenvs):
            actor = remote_actor.remote(i, env_fns[i])
            self.actors.append(actor)
            self.actor_to_i[actor] = i

        observation_space, action_space = ray.get(
            self.actors[0].get_spaces.remote())
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)

        self.results = [([0] * OBSERVATION_SPACE, 0, False, {
            "bad": True
        })] * self.num_envs
Esempio n. 8
0
    def __init__(self, env_fns, spaces=None):
        """
        envs: list of gym environments to run in subprocesses
        """
        self.waiting = False
        self.closed = False
        nenvs = len(env_fns)
        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
        self.ps = [
            Process(target=worker,
                    args=(work_remote, remote, CloudpickleWrapper(env_fn)))
            for (work_remote, remote,
                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
        ]
        for p in self.ps:
            p.daemon = True  # if the main process crashes, we should not cause things to hang
            p.start()
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces', None))
        observation_space, action_space = self.remotes[0].recv()
        self.remotes[0].send(('get_agent_types', None))
        self.agent_types = self.remotes[0].recv()
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)
Esempio n. 9
0
    def __init__(self, env_fns):
        """
        Arguments:

        env_fns: iterable of callables      functions that build environments
        """
        self.envs = [fn() for fn in env_fns]
        env = self.envs[0]
        VecEnv.__init__(self, len(env_fns), env.observation_space,
                        env.action_space)
        obs_space = env.observation_space
        self.n_actors = env.n_actors
        self.keys, shapes, dtypes = obs_space_info(obs_space)
        self.buf_obs = {
            k: np.zeros((
                self.num_envs,
                self.n_actors,
            ) + tuple(shapes[k]),
                        dtype=dtypes[k])
            for k in self.keys
        }
        self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool)
        self.buf_rews = np.zeros((
            self.num_envs,
            env.n_actors,
        ),
                                 dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None
        self.spec = self.envs[0].spec
    def __init__(self, env_fns, spaces=None):
        """
        Arguments:

        env_fns: iterable of callables -  functions that create environments to run in subprocesses. Need to be cloud-pickleable
        """
        self.waiting = False
        self.closed = False
        nenvs = len(env_fns)
        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
        self.ps = [
            Process(target=worker,
                    args=(work_remote, remote, CloudpickleWrapper(env_fn)))
            for (work_remote, remote,
                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
        ]
        for p in self.ps:
            p.daemon = True  # if the main process crashes, we should not cause things to hang
            p.start()
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces', None))
        observation_space, action_space, randomization_space = self.remotes[
            0].recv()
        self.randomization_space = randomization_space
        self.viewer = None
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)
Esempio n. 11
0
    def __init__(self, env_fns, spaces=None, context='spawn', in_series=1):
        """
        Arguments:

        env_fns: iterable of callables -  functions that create environments to run in subprocesses. Need to be cloud-pickleable
        in_series: number of environments to run in series in a single process
        (e.g. when len(env_fns) == 12 and in_series == 3, it will run 4 processes, each running 3 envs in series)
        """
        self.waiting = False
        self.closed = False
        self.in_series = in_series
        nenvs = len(env_fns)
        assert nenvs % in_series == 0, "Number of envs must be divisible by number of envs to run in series"
        self.nremotes = nenvs // in_series
        env_fns = np.array_split(env_fns, self.nremotes)
        ctx = mp.get_context(context)
        self.remotes, self.work_remotes = zip(*[ctx.Pipe() for _ in range(self.nremotes)])
        self.ps = [ctx.Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
        for p in self.ps:
            p.daemon = True  # if the main process crashes, we should not cause things to hang
            p.start()
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces_spec', None))
        observation_space, action_space, self.spec = self.remotes[0].recv().x
        self.viewer = None
        VecEnv.__init__(self, nenvs, observation_space, action_space)
Esempio n. 12
0
 def __init__(self, ordinary_env):
     self.env = ordinary_env
     self.nenv = len(ordinary_env)
     self.action_space = ordinary_env[0].action_space
     self.observation_space = ordinary_env[0].observation_space
     VecEnv.__init__(self,
                     num_envs=self.nenv,
                     observation_space=ordinary_env[0].observation_space,
                     action_space=ordinary_env[0].action_space)
Esempio n. 13
0
 def __init__(self, env_fns):
     self.envs = [fn() for fn in env_fns]
     env = self.envs[0]
     VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
     if all([hasattr(a, 'adversary') for a in env.agents]):
         self.agent_types = ['adversary' if a.adversary else 'agent' for a in env.agents]
     else:
         self.agent_types = ['agent' for _ in env.agents]
     self.ts = np.zeros(len(self.envs), dtype='int')
     self.actions = None
Esempio n. 14
0
 def __init__(self, app_name, num_envs=2, base=0):
     self.name = app_name
     self.envs = [env_wrapper(app_name, base+idx) for idx in range(num_envs)]
     self.num_envs = num_envs
     
     env = self.envs[0]
     self.observation_space = env.ob_space
     self.action_space = env.ac_space
     VecEnv.__init__(self, num_envs, env.ob_space, env.ac_space)
     
     self.ts = np.zeros(num_envs, dtype='int')  
     self.actions = None
Esempio n. 15
0
    def __init__(self, env_fns):
        """
        Arguments:
        env_fns: iterable of callables functions that build environments
        """
        self.envs = [fn() for fn in env_fns]
        env = self.envs[0]
        VecEnv.__init__(self, len(env_fns), env.observation_space,
                        env.action_space)

        self.transitions = [None for _ in range(self.num_envs)]
        self.actions = None
        self.spec = self.envs[0].spec
Esempio n. 16
0
 def reset(self, now_agent_num, now_box_num=None):
     if now_box_num is None:
         for remote in self.remotes:
             remote.send((['reset',now_agent_num], None))
     else:
         for remote in self.remotes:
             remote.send((['reset_pb',now_agent_num, now_box_num], None))
     results = [remote.recv() for remote in self.remotes]
     obs, available_actions = zip(*results)
     self.remotes[0].send(('get_spaces', None))
     observation_space, action_space = self.remotes[0].recv()
     VecEnv.__init__(self, self.length, observation_space, action_space)
     return np.stack(obs), np.stack(available_actions)
Esempio n. 17
0
 def __init__(self, env_fns):
     self.envs = [fn() for fn in env_fns]
     env = self.envs[0]
     VecEnv.__init__(self, len(env_fns), env.observation_space,
                     env.action_space)
     obs_spaces = self.observation_space.spaces if isinstance(
         self.observation_space,
         gym.spaces.Tuple) else (self.observation_space, )
     self.buf_obs = []
     self.buf_dones = []
     self.buf_rews = []
     self.buf_infos = []
     self.actions = []
Esempio n. 18
0
 def __init__(self, env_fns):
     self.envs = [fn() for fn in env_fns]
     env = self.envs[0]
     VecEnv.__init__(self, len(env_fns), env.observation_space,
                     env.action_space)
     self.agent_type_left = [
         'agent' for _ in range(env.num_controlled_lagents)
     ]
     self.agent_type_right = [
         'adversary' for _ in range(env.num_controlled_ragents)
     ]
     self.agent_types = self.agent_type_left + self.agent_type_right
     self.ts = np.zeros(len(self.envs), dtype='int')
     self.actions = None
Esempio n. 19
0
    def __init__(self,
                 env_fns,
                 spaces=None,
                 level_selector=None,
                 experiment=None,
                 dataset=None):
        """
        Arguments:
        env_fns: iterable of callables      functions that build environments
        """
        self.envs = [fn() for fn in env_fns]
        self.num_envs = len(env_fns)
        env = self.envs[0]
        VecEnv.__init__(self, len(env_fns), env.observation_space,
                        env.action_space)

        obs_space = env.observation_space
        self.keys, shapes, dtypes = obs_space_info(obs_space)

        self.buf_obs = {
            k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k])
            for k in self.keys
        }
        self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool)
        self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None
        self.spec = self.envs[0].spec

        self.finsihed = [False for _ in range(self.num_envs)]
        self.last_mes = [None for _ in range(self.num_envs)]
        self.level_selector = level_selector
        if USE_IMMITATION_ENV:
            self.featurizer = TDCFeaturizer(92,
                                            92,
                                            84,
                                            84,
                                            feature_vector_size=1024,
                                            learning_rate=0,
                                            experiment_name=experiment)
            self.featurizer.load(dataset)
            video_dataset = generate_dataset(experiment,
                                             framerate=60,
                                             width=84,
                                             height=84)[0]
            self.featurized_dataset = self.featurizer.featurize(video_dataset)
            self.checkpoint_indexes = [0] * nenvs

            self.rewards = 0
            self.counter = 0
Esempio n. 20
0
 def __init__(self, env_fns):
     self.envs = [fn() for fn in env_fns]
     env = self.envs[0]
     VecEnv.__init__(self, len(env_fns), env.observation_space,
                     env.action_space)
     # (** EDITED **) Set Agent Type
     # Google Football Env.agents 사용 불가 -> 직접 agent type 설정
     """if all([hasattr(a, 'adversary') for a in env.agents]):
         self.agent_types = ['adversary' if a.adversary else 'agent' for a in
                             env.agents]
     else:
         self.agent_types = ['agent' for _ in env.agents]"""
     self.agent_types = ['agent' for _ in range(11)]
     self.ts = np.zeros(len(self.envs), dtype='int')
     self.actions = None
Esempio n. 21
0
    def __init__(self, env_fns):
        """
        env_fns: List of functions that create gym environments to run in subprocesses
        """
        self.waiting = False
        self.closed = False
        self.nenvs = len(env_fns)

        self.actors = [EnvActor.remote(fn) for fn in env_fns]

        observation_space, action_space = ray.get(
            self.actors[0].get_space.remote())
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)

        self.step_futures = None
Esempio n. 22
0
    def __init__(self,
                 env_fns,
                 spaces=None,
                 level_selector=None,
                 experiment=None,
                 dataset=None):
        """
        envs: list of gym environments to run in subprocesses
        """
        self.waiting = False
        self.closed = False
        nenvs = len(env_fns)
        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
        self.ps = [
            Process(target=worker,
                    args=(work_remote, remote, CloudpickleWrapper(env_fn),
                          level_selector))
            for (work_remote, remote,
                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
        ]
        for p in self.ps:
            p.daemon = True  # if the main process crashes, we should not cause things to hang
            print("start processes")
            p.start()
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces', None))
        observation_space, action_space = self.remotes[0].recv()
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)
        if USE_IMMITATION_ENV:
            self.featurizer = TDCFeaturizer(84,
                                            84,
                                            84,
                                            84,
                                            feature_vector_size=1024,
                                            learning_rate=0,
                                            experiment_name=experiment)
            self.featurizer.load(dataset)
            video_dataset = generate_dataset(experiment,
                                             framerate=60,
                                             width=84,
                                             height=84)[0]
            self.featurized_dataset = self.featurizer.featurize(video_dataset)
            self.checkpoint_indexes = [0] * nenvs

            self.rewards = 0
            self.counter = 0
Esempio n. 23
0
 def __init__(self, env_fns):
     self.envs = [fn() for fn in env_fns]
     env = self.envs[0]
     o = env.reset()
     n = len(o)
     dim_o = o[0].shape[-1]
     if gym.__version__ == "0.9.4":
         obs = gym.spaces.Box(low=-np.inf, high=+np.inf, shape=(n, dim_o))
     else:
         obs = gym.spaces.Box(low=-np.inf,
                              high=+np.inf,
                              shape=(n, dim_o),
                              dtype=np.float32)
     act = gym.spaces.Discrete(5)
     nenvs = 1
     VecEnv.__init__(self, nenvs, obs, act)
Esempio n. 24
0
 def new_starts_obs_pb(self, starts, now_agent_num, now_box_num, now_num_processes):
     i = 0
     results = []
     for remote in self.remotes:
         if i < now_num_processes:
             tmp_list = ['new_starts_obs_pb', now_agent_num, now_box_num, starts[i]]
             remote.send((tmp_list, None))
             i += 1
     i = 0
     for remote in self.remotes:
         if i < now_num_processes:
             results.append(remote.recv())
             i += 1
     self.remotes[0].send(('get_spaces', None))
     observation_space, action_space = self.remotes[0].recv()
     VecEnv.__init__(self, self.length, observation_space, action_space)
     return np.stack(results)
Esempio n. 25
0
    def __init__(self, env_fns):
        self.envs = [fn() for fn in env_fns]
        env = self.envs[0]
        VecEnv.__init__(self, len(env_fns), env.observation_space,
                        env.action_space)

        obs_spaces = self.observation_space.spaces if isinstance(
            self.observation_space,
            gym.spaces.Tuple) else (self.observation_space, )
        self.buf_obs = [
            np.zeros((self.num_envs, ) + tuple(s.shape), s.dtype)
            for s in obs_spaces
        ]
        self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool)
        self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        self.actions = None
    def __init__(self, env_fns, spaces=None):
        """
        envs: list of gym environments to run in subprocesses
        """
        self.waiting = False
        self.closed = False
        nenvs = len(env_fns)
        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
        self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
            for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
        for p in self.ps:
            p.daemon = True # if the main process crashes, we should not cause things to hang
            p.start()
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces', None))
        observation_space, action_space = self.remotes[0].recv()
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)
Esempio n. 27
0
    def __init__(self, env_id):
        print ("**** ", env_id, platform.system())
        # env = UnityEnv(env_id, multiagent=True)
        env_id = "hopper"
        # env_id = "walker"
        if platform.system() == 'Windows':
            env_path = os.path.join('envs', env_id+'-x16', 'Unity Environment.exe')
        elif platform.system() == 'Darwin': # MacOS
            env_path = os.path.join('envs', env_id+'-x16')
        elif platform.system() == 'Linux': 
            env_path = os.path.join('envs', env_id+'-x16')
        print ("**** Override", env_path, env_id)
        env = UnityEnv(env_path, multiagent=True)
        self.env = env
        env.num_envs = env.number_agents
        VecEnv.__init__(self, env.num_envs, env.observation_space, env.action_space)
        # obs_space = env.observation_space
        # spec = env.spec

        # self.keys, shapes, dtypes = obs_space_info(obs_space)
        # self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys }
        # self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
        # self.buf_rews  = np.zeros((self.num_envs,), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        # Fake Monitor
        self.tstart = time.time()
        self.results_writer = ResultsWriter(
            "filename",
            header={"t_start": time.time(), 'env_id' : env.spec and env.spec.id},
            extra_keys=() + ()
        )
        self.reset_keywords = ()
        self.info_keywords = ()
        self.allow_early_resets = True
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_times = []
        self.total_steps = 0
        self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()
Esempio n. 28
0
 def __init__(self, env_fns):
     self.envs = [fn() for fn in env_fns]
     env = self.envs[0]
     VecEnv.__init__(self, len(env_fns), env.observation_space,
                     env.action_space)
     self.hier = False
     shapes, dtypes = {}, {}
     self.keys = []
     obs_space = env.observation_space
     if isinstance(obs_space, spaces.Dict):
         assert isinstance(obs_space.spaces, OrderedDict)
         for key, box in obs_space.spaces.items():
             assert isinstance(box, spaces.Box)
             shapes[key] = box.shape
             dtypes[key] = box.dtype
             self.keys.append(key)
     elif isinstance(obs_space, spaces.Tuple):
         tokens, box = obs_space.spaces
         self.buf_tokens = {
             None: np.zeros((self.num_envs, ) + (tokens.n, ))
         }
         self.hier = True
         self.keys = [None]
         shapes, dtypes = {None: box.shape}, {None: box.dtype}
     else:
         box = obs_space
         assert isinstance(box, spaces.Box)
         self.keys = [None]
         shapes, dtypes = {None: box.shape}, {None: box.dtype}
     self.buf_obs = {
         k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k])
         for k in self.keys
     }
     self.final_obs = {
         k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k])
         for k in self.keys
     }
     self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool)
     self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32)
     self.buf_infos = [{} for _ in range(self.num_envs)]
     self.actions = None
Esempio n. 29
0
    def __init__(self, env_fns, h5_path=None, spaces=None, context='spawn'):
        """
        Arguments:

        env_fns: iterable of callables -  functions that create environments to run in subprocesses. Need to be cloud-pickleable
        """
        self.waiting = False
        self.closed = False
        nenvs = len(env_fns)
        ctx = mp.get_context(context)
        self.remotes, self.work_remotes = zip(
            *[ctx.Pipe() for _ in range(nenvs)])
        self.ps = [
            ctx.Process(target=worker_acc,
                        args=(work_remote, remote, CloudpickleWrapper(env_fn)))
            for (work_remote, remote,
                 env_fn) in zip(self.work_remotes, self.remotes, env_fns)
        ]
        for p in self.ps:
            p.daemon = True  # if the main process crashes, we should not cause things to hang
            with clear_mpi_env_vars():
                p.start()
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces_spec', None))
        observation_space, action_space, self.spec = self.remotes[0].recv()
        self.viewer = None

        # load HighD data
        self.highd_f = h5py.File(h5_path, "r")
        self.highd_vels = self.highd_f['initial_velocities']
        self.highd_accs = self.highd_f['x_accelerations']

        self.staliro_trace_queue = None

        # self.obs_pos = None
        # self.obs_vel = None
        # self.obs_driver = None

        VecEnv.__init__(self, len(env_fns), observation_space, action_space)
Esempio n. 30
0
    def __init__(self, env_fns, spaces=None):
        """
        envs: list of gym environments to run in subprocesses
        """
        self.waiting = False
        self.closed = False
        nenvs = len(env_fns)
        # Pipe() function returns a pair of connection objects connected by a pipe which by default is duplex (two-way)
        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
        self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) # Create process objects
            for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
        for p in self.ps:
            p.daemon = True # if the main process crashes, we should not cause things to hang (Process's daemon flag must be set
                            # before before process starts)
            p.start() # Start the process
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces', None))
        observation_space, action_space = self.remotes[0].recv()
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)
 def __init__(self, env_fns, spaces=None, context='spawn'):
     """
     If you don't specify observation_space, we'll have to create a dummy
     environment to get it.
     """
     ctx = mp.get_context(context)
     if spaces:
         observation_space, action_space = spaces
     else:
         logger.log('Creating dummy env object to get spaces')
         with logger.scoped_configure(format_strs=[]):
             dummy = env_fns[0]()
             observation_space, action_space = dummy.observation_space, dummy.action_space
             dummy.close()
             del dummy
     VecEnv.__init__(self, len(env_fns), observation_space, action_space)
     self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(
         observation_space)
     self.obs_bufs = [{
         k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type],
                      int(np.prod(self.obs_shapes[k])))
         for k in self.obs_keys
     } for _ in env_fns]
     self.parent_pipes = []
     self.procs = []
     with clear_mpi_env_vars():
         for env_fn, obs_buf in zip(env_fns, self.obs_bufs):
             wrapped_fn = CloudpickleWrapper(env_fn)
             parent_pipe, child_pipe = ctx.Pipe()
             proc = ctx.Process(target=_subproc_worker,
                                args=(child_pipe, parent_pipe, wrapped_fn,
                                      obs_buf, self.obs_shapes,
                                      self.obs_dtypes, self.obs_keys))
             proc.daemon = True
             self.procs.append(proc)
             self.parent_pipes.append(parent_pipe)
             proc.start()
             child_pipe.close()
     self.waiting_step = False
     self.viewer = None
    def __init__(self, env_fns, spaces=None):
        """
        envs: list of gym environments to run in subprocesses
        """
        self.waiting = False
        self.closed = False
        self.task_pool = TaskPool(timeout=10)

        nenvs = len(env_fns)

        self.actors = []
        self.actor_to_i = {}
        remote_actor = ray.remote(Actor)
        for i in range(nenvs):
            actor = remote_actor.remote(i, env_fns[i])
            self.actors.append(actor)
            self.actor_to_i[actor] = i

        observation_space, action_space = ray.get(self.actors[0].get_spaces.remote())
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)

        self.results = [([0] * OBSERVATION_SPACE, 0, False, {"bad": True})] * self.num_envs