def __init__(self, env_id, n_agents): env_path = UnityVecEnv.GetFilePath(env_id, n_agents=n_agents) print("**** ", env_path) env = UnityEnv(env_path, multiagent=True) self.env = env env.num_envs = env.number_agents VecEnv.__init__(self, env.num_envs, env.observation_space, env.action_space) obs_space = env.observation_space # self.keys, shapes, dtypes = obs_space_info(obs_space) # self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } # self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) # self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] # Fake Monitor self.tstart = time.time() self.results_writer = ResultsWriter("filename", header={ "t_start": time.time(), 'env_id': env.spec and env.spec.id }, extra_keys=() + ()) self.reset_keywords = () self.info_keywords = () self.allow_early_resets = True self.rewards = None self.needs_reset = True self.episode_rewards = [] self.episode_lengths = [] self.episode_times = [] self.total_steps = 0 self.current_reset_info = { } # extra info about the current episode, that was passed in during reset()
def __init__(self, env_id): print ("**** ", env_id, platform.system()) # env = UnityEnv(env_id, multiagent=True) env_id = "hopper" # env_id = "walker" if platform.system() == 'Windows': env_path = os.path.join('envs', env_id+'-x16', 'Unity Environment.exe') elif platform.system() == 'Darwin': # MacOS env_path = os.path.join('envs', env_id+'-x16') elif platform.system() == 'Linux': env_path = os.path.join('envs', env_id+'-x16') print ("**** Override", env_path, env_id) env = UnityEnv(env_path, multiagent=True) self.env = env env.num_envs = env.number_agents VecEnv.__init__(self, env.num_envs, env.observation_space, env.action_space) # obs_space = env.observation_space # spec = env.spec # self.keys, shapes, dtypes = obs_space_info(obs_space) # self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } # self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) # self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] # Fake Monitor self.tstart = time.time() self.results_writer = ResultsWriter( "filename", header={"t_start": time.time(), 'env_id' : env.spec and env.spec.id}, extra_keys=() + () ) self.reset_keywords = () self.info_keywords = () self.allow_early_resets = True self.rewards = None self.needs_reset = True self.episode_rewards = [] self.episode_lengths = [] self.episode_times = [] self.total_steps = 0 self.current_reset_info = {} # extra info about the current episode, that was passed in during reset()