예제 #1
0
    def __init__(self, env_id, n_agents):
        env_path = UnityVecEnv.GetFilePath(env_id, n_agents=n_agents)
        print("**** ", env_path)
        env = UnityEnv(env_path, multiagent=True)
        self.env = env
        env.num_envs = env.number_agents
        VecEnv.__init__(self, env.num_envs, env.observation_space,
                        env.action_space)
        obs_space = env.observation_space

        # self.keys, shapes, dtypes = obs_space_info(obs_space)
        # self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys }
        # self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
        # self.buf_rews  = np.zeros((self.num_envs,), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        # Fake Monitor
        self.tstart = time.time()
        self.results_writer = ResultsWriter("filename",
                                            header={
                                                "t_start": time.time(),
                                                'env_id': env.spec
                                                and env.spec.id
                                            },
                                            extra_keys=() + ())
        self.reset_keywords = ()
        self.info_keywords = ()
        self.allow_early_resets = True
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_times = []
        self.total_steps = 0
        self.current_reset_info = {
        }  # extra info about the current episode, that was passed in during reset()
예제 #2
0
    def __init__(self, env_id):
        print ("**** ", env_id, platform.system())
        # env = UnityEnv(env_id, multiagent=True)
        env_id = "hopper"
        # env_id = "walker"
        if platform.system() == 'Windows':
            env_path = os.path.join('envs', env_id+'-x16', 'Unity Environment.exe')
        elif platform.system() == 'Darwin': # MacOS
            env_path = os.path.join('envs', env_id+'-x16')
        elif platform.system() == 'Linux': 
            env_path = os.path.join('envs', env_id+'-x16')
        print ("**** Override", env_path, env_id)
        env = UnityEnv(env_path, multiagent=True)
        self.env = env
        env.num_envs = env.number_agents
        VecEnv.__init__(self, env.num_envs, env.observation_space, env.action_space)
        # obs_space = env.observation_space
        # spec = env.spec

        # self.keys, shapes, dtypes = obs_space_info(obs_space)
        # self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys }
        # self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool)
        # self.buf_rews  = np.zeros((self.num_envs,), dtype=np.float32)
        self.buf_infos = [{} for _ in range(self.num_envs)]
        # Fake Monitor
        self.tstart = time.time()
        self.results_writer = ResultsWriter(
            "filename",
            header={"t_start": time.time(), 'env_id' : env.spec and env.spec.id},
            extra_keys=() + ()
        )
        self.reset_keywords = ()
        self.info_keywords = ()
        self.allow_early_resets = True
        self.rewards = None
        self.needs_reset = True
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_times = []
        self.total_steps = 0
        self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()