예제 #1
0
파일: actor.py 프로젝트: wotmd5731/J_LAB
class env_cover():
    def __init__(self, config, dev):

        self.dev = dev
        self.num_env = config['num_envs']
        self.get_img_from_render = config['get_img_from_render']

        self.obs_shape = (self.num_env, ) + config['obs_space'][1:]
        #        print(self.obs_shape)
        self.reward_shape = (self.num_env, ) + config['reward_space'][1:]
        self.gamma_shape = (self.num_env, ) + config['gamma_space'][1:]

        if self.num_env == 1:
            self.env = gym.make(config['game_name'])
        else:

            def make_env():
                def _thunk():
                    env = gym.make(config['game_name'])
                    return env

                return _thunk

            envs = [make_env() for i in range(self.num_env)]
            self.env = SubprocVecEnv(envs)

#
#def obs_preproc(x):
#    if IMG_GET_RENDER ==False:
#        return torch.from_numpy(np.resize(x, feature_state)).float().unsqueeze(0)
#    x = np.dot(x, np.array([[0.299, 0.587, 0.114]]).T)
#    x = np.reshape(x, (1,x.shape[1], x.shape[0]))
#    return torch.from_numpy(np.resize(x, feature_state)).float().unsqueeze(0)/255
#

    def reset(self):
        st = self.env.reset()
        if self.get_img_from_render:
            st = self.env.render(mode='rgb_array')
            st = np.resize(st, self.obs_shape) / 255.

        return torch.FloatTensor(st).reshape(self.obs_shape).to(
            self.dev), torch.zeros(self.reward_shape).to(
                self.dev), torch.zeros(self.gamma_shape).to(self.dev)
        #return st, 0,False

#    def get_obs(self,obs):
#        return torch.from_numpy(obs).detach().float().view(1,config['obs_space'])

    def step(self, action):

        st, rt, dt, _ = self.env.step(action)

        if self.get_img_from_render:
            st = self.env.render(mode='rgb_array')
            st = np.resize(st, self.obs_shape) / 255.


#        print(st)
        st = torch.FloatTensor(st).reshape(self.obs_shape).to(self.dev)
        rt = torch.FloatTensor([rt]).reshape(self.reward_shape).to(self.dev)
        if self.num_env == 1:
            dt = torch.FloatTensor([dt]).reshape(self.gamma_shape).to(self.dev)
        else:
            dt = torch.FloatTensor(dt.astype(int)).reshape(
                self.gamma_shape).to(self.dev)

        return st, rt, dt

    def end_dummy(self):
        return torch.zeros(self.obs_shape).to(self.dev), torch.zeros(
            self.reward_shape).to(self.dev), torch.zeros(self.gamma_shape).to(
                self.dev)

    def render(self):
        self.env.render()

    def close(self):
        self.env.close()
예제 #2
0
if __name__ == '__main__':
    envs = [make_env for i in range(N_ENVS)]
    envs = SubprocVecEnv(envs)
    obs = envs.reset()
    print("OBSERVATION ", obs[0])
    obs = obs.reshape(-1)
    obs_shape = obs.shape
    envs = VecNormalize(envs, obs_shape, ob=False, gamma=0.99)

    n_steps = 100
    bar = ProgBar(n_steps, bar_char='█')
    for i_episode in range(2):
        ## reinitialize the environment
        observation = envs.reset()
        ## the simulation for n_steps timesteps
        for t in range(n_steps):
            ##  value, is_rate, is_producer, is_open
            actions_inje = [[randint(410, 430), False, False, True]
                            for _ in range(8)]
            actions_prod = [[randint(220, 250), False, True, True]
                            for _ in range(4)]
            ## Advance the simulation forward
            observation, reward, done, observation_full = \
                envs.step([(actions_inje + actions_prod) for _ in range(N_ENVS)])
            # print (reward)
            bar.update()
            if done.any():
                print("Episode finished after {} timesteps".format(t + 1))
                break
    envs.close()