class env_cover(): def __init__(self, config, dev): self.dev = dev self.num_env = config['num_envs'] self.get_img_from_render = config['get_img_from_render'] self.obs_shape = (self.num_env, ) + config['obs_space'][1:] # print(self.obs_shape) self.reward_shape = (self.num_env, ) + config['reward_space'][1:] self.gamma_shape = (self.num_env, ) + config['gamma_space'][1:] if self.num_env == 1: self.env = gym.make(config['game_name']) else: def make_env(): def _thunk(): env = gym.make(config['game_name']) return env return _thunk envs = [make_env() for i in range(self.num_env)] self.env = SubprocVecEnv(envs) # #def obs_preproc(x): # if IMG_GET_RENDER ==False: # return torch.from_numpy(np.resize(x, feature_state)).float().unsqueeze(0) # x = np.dot(x, np.array([[0.299, 0.587, 0.114]]).T) # x = np.reshape(x, (1,x.shape[1], x.shape[0])) # return torch.from_numpy(np.resize(x, feature_state)).float().unsqueeze(0)/255 # def reset(self): st = self.env.reset() if self.get_img_from_render: st = self.env.render(mode='rgb_array') st = np.resize(st, self.obs_shape) / 255. return torch.FloatTensor(st).reshape(self.obs_shape).to( self.dev), torch.zeros(self.reward_shape).to( self.dev), torch.zeros(self.gamma_shape).to(self.dev) #return st, 0,False # def get_obs(self,obs): # return torch.from_numpy(obs).detach().float().view(1,config['obs_space']) def step(self, action): st, rt, dt, _ = self.env.step(action) if self.get_img_from_render: st = self.env.render(mode='rgb_array') st = np.resize(st, self.obs_shape) / 255. # print(st) st = torch.FloatTensor(st).reshape(self.obs_shape).to(self.dev) rt = torch.FloatTensor([rt]).reshape(self.reward_shape).to(self.dev) if self.num_env == 1: dt = torch.FloatTensor([dt]).reshape(self.gamma_shape).to(self.dev) else: dt = torch.FloatTensor(dt.astype(int)).reshape( self.gamma_shape).to(self.dev) return st, rt, dt def end_dummy(self): return torch.zeros(self.obs_shape).to(self.dev), torch.zeros( self.reward_shape).to(self.dev), torch.zeros(self.gamma_shape).to( self.dev) def render(self): self.env.render() def close(self): self.env.close()
if __name__ == '__main__': envs = [make_env for i in range(N_ENVS)] envs = SubprocVecEnv(envs) obs = envs.reset() print("OBSERVATION ", obs[0]) obs = obs.reshape(-1) obs_shape = obs.shape envs = VecNormalize(envs, obs_shape, ob=False, gamma=0.99) n_steps = 100 bar = ProgBar(n_steps, bar_char='█') for i_episode in range(2): ## reinitialize the environment observation = envs.reset() ## the simulation for n_steps timesteps for t in range(n_steps): ## value, is_rate, is_producer, is_open actions_inje = [[randint(410, 430), False, False, True] for _ in range(8)] actions_prod = [[randint(220, 250), False, True, True] for _ in range(4)] ## Advance the simulation forward observation, reward, done, observation_full = \ envs.step([(actions_inje + actions_prod) for _ in range(N_ENVS)]) # print (reward) bar.update() if done.any(): print("Episode finished after {} timesteps".format(t + 1)) break envs.close()