Exemple #1
0
    def __init__(self, venv, nstack, device=None):

        self.venv = venv
        self.nstack = nstack

        wos = venv.observation_space  # wrapped ob space
        wos = obs_to_dict(wos)
        self.stacked_obs = {}
        new_observation_spaces = {}
        self.shape_dim0 = {}
        for k in wos.spaces:

            self.shape_dim0[k] = wos.spaces[k].shape[0]
            low = np.repeat(wos.spaces[k].low, self.nstack, axis=0)
            high = np.repeat(wos.spaces[k].high, self.nstack, axis=0)

            if device is None:
                device = torch.device('cpu')
            self.stacked_obs[k] = torch.zeros((venv.num_envs, ) +
                                              low.shape).to(device)

            new_observation_spaces[k] = gym.spaces.Box(low=low,
                                                       high=high,
                                                       dtype=np.float32)

        if set(new_observation_spaces.keys()) == {None}:
            VecEnvWrapper.__init__(
                self, venv, observation_space=new_observation_spaces[None])
        else:
            VecEnvWrapper.__init__(
                self,
                venv,
                observation_space=gym.spaces.Dict(new_observation_spaces))
Exemple #2
0
 def __init__(self,
              venv,
              ob=True,
              ret=False,
              clipob=5.,
              cliprew=10.,
              gamma=0.99,
              epsilon=1e-8,
              use_tf=False):
     VecEnvWrapper.__init__(self, venv)
     if use_tf:
         from running_mean_std import TfRunningMeanStd
         self.ob_rms = TfRunningMeanStd(shape=self.observation_space.shape,
                                        scope='ob_rms') if ob else None
         self.ret_rms = TfRunningMeanStd(shape=(),
                                         scope='ret_rms') if ret else None
     else:
         from running_mean_std import RunningMeanStd
         self.ob_rms = RunningMeanStd(
             shape=self.observation_space.shape) if ob else None
         self.ret_rms = RunningMeanStd(shape=()) if ret else None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
Exemple #3
0
 def __init__(self,
              venv,
              ob=True,
              ret=True,
              train=True,
              noclip=False,
              has_timestep=False,
              ignore_mask=None,
              freeze_mask=None,
              time_scale=1e-3,
              clipob=10.,
              cliprew=10.,
              gamma=0.99,
              epsilon=1e-8):
     VecEnvWrapper.__init__(self, venv)
     self.ob_rms = RunningMeanStd(
         shape=self.observation_space.shape) if ob else None
     self.ret_rms = RunningMeanStd(shape=()) if ret else None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = np.zeros(self.num_envs)
     self.train = train
     self.gamma = gamma
     self.epsilon = epsilon
     self.noclip = noclip
     self.ignore_mask = ignore_mask
     self.freeze_mask = freeze_mask
     self.has_timestep = has_timestep
     self.time_scale = time_scale
Exemple #4
0
 def __init__(self, venv, nstack):
     self.venv = venv
     self.nstack = nstack
     wos = venv.observation_space  # wrapped ob space
     low = np.repeat(wos.low, self.nstack, axis=-1)
     high = np.repeat(wos.high, self.nstack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs,) + low.shape, low.dtype)
     observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Exemple #5
0
 def __init__(self, venv, nstack, device):
     self.venv = venv
     self.nstack = nstack
     wos = venv.observation_space  # wrapped ob space
     self.shape_dim0 = wos.low.shape[0]
     low = np.repeat(wos.low, self.nstack, axis=0)
     high = np.repeat(wos.high, self.nstack, axis=0)
     self.stackedobs = np.zeros((venv.num_envs, ) + low.shape)
     self.stackedobs = torch.from_numpy(self.stackedobs).float()
     self.stackedobs = self.stackedobs.to(device)
     observation_space = gym.spaces.Box(low=low,
                                        high=high,
                                        dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Exemple #6
0
    def __init__(self, venv, nstack, device=None):
        self.venv = venv
        self.nstack = nstack
        wos = venv.observation_space  # wrapped ob space
        self.shape_dim0 = wos.shape[0]

        low = np.repeat(wos.low, self.nstack, axis=0)
        high = np.repeat(wos.high, self.nstack, axis=0)

        if device is None:
            device = torch.device('cpu')
        self.stacked_obs = torch.zeros((venv.num_envs, ) +
                                       low.shape).to(device)

        observation_space = gym.spaces.Box(low=low,
                                           high=high,
                                           dtype=venv.observation_space.dtype)
        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)