def __init__(self, venv, nstack, device=None): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space wos = obs_to_dict(wos) self.stacked_obs = {} new_observation_spaces = {} self.shape_dim0 = {} for k in wos.spaces: self.shape_dim0[k] = wos.spaces[k].shape[0] low = np.repeat(wos.spaces[k].low, self.nstack, axis=0) high = np.repeat(wos.spaces[k].high, self.nstack, axis=0) if device is None: device = torch.device('cpu') self.stacked_obs[k] = torch.zeros((venv.num_envs, ) + low.shape).to(device) new_observation_spaces[k] = gym.spaces.Box(low=low, high=high, dtype=np.float32) if set(new_observation_spaces.keys()) == {None}: VecEnvWrapper.__init__( self, venv, observation_space=new_observation_spaces[None]) else: VecEnvWrapper.__init__( self, venv, observation_space=gym.spaces.Dict(new_observation_spaces))
def __init__(self, venv, ob=True, ret=False, clipob=5., cliprew=10., gamma=0.99, epsilon=1e-8, use_tf=False): VecEnvWrapper.__init__(self, venv) if use_tf: from running_mean_std import TfRunningMeanStd self.ob_rms = TfRunningMeanStd(shape=self.observation_space.shape, scope='ob_rms') if ob else None self.ret_rms = TfRunningMeanStd(shape=(), scope='ret_rms') if ret else None else: from running_mean_std import RunningMeanStd self.ob_rms = RunningMeanStd( shape=self.observation_space.shape) if ob else None self.ret_rms = RunningMeanStd(shape=()) if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon
def __init__(self, venv, ob=True, ret=True, train=True, noclip=False, has_timestep=False, ignore_mask=None, freeze_mask=None, time_scale=1e-3, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): VecEnvWrapper.__init__(self, venv) self.ob_rms = RunningMeanStd( shape=self.observation_space.shape) if ob else None self.ret_rms = RunningMeanStd(shape=()) if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.train = train self.gamma = gamma self.epsilon = epsilon self.noclip = noclip self.ignore_mask = ignore_mask self.freeze_mask = freeze_mask self.has_timestep = has_timestep self.time_scale = time_scale
def __init__(self, venv, nstack): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space low = np.repeat(wos.low, self.nstack, axis=-1) high = np.repeat(wos.high, self.nstack, axis=-1) self.stackedobs = np.zeros((venv.num_envs,) + low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, nstack, device): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space self.shape_dim0 = wos.low.shape[0] low = np.repeat(wos.low, self.nstack, axis=0) high = np.repeat(wos.high, self.nstack, axis=0) self.stackedobs = np.zeros((venv.num_envs, ) + low.shape) self.stackedobs = torch.from_numpy(self.stackedobs).float() self.stackedobs = self.stackedobs.to(device) observation_space = gym.spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, nstack, device=None): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space self.shape_dim0 = wos.shape[0] low = np.repeat(wos.low, self.nstack, axis=0) high = np.repeat(wos.high, self.nstack, axis=0) if device is None: device = torch.device('cpu') self.stacked_obs = torch.zeros((venv.num_envs, ) + low.shape).to(device) observation_space = gym.spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)