def __init__(self, venv, directory, record_video_trigger, video_length=200): """ # Arguments venv: VecEnv to wrap directory: Where to save videos record_video_trigger: Function that defines when to start recording. The function takes the current number of step, and returns whether we should start recording or not. video_length: Length of recorded video """ VecEnvWrapper.__init__(self, venv) self.record_video_trigger = record_video_trigger self.video_recorder = None self.directory = os.path.abspath(directory) if not os.path.exists(self.directory): os.mkdir(self.directory) self.file_prefix = "vecenv" self.file_infix = '{}'.format(os.getpid()) self.step_id = 0 self.video_length = video_length self.recording = False self.recorded_frames = 0
def __init__(self, venv, nstack): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space low = np.repeat(wos.low, self.nstack, axis=-1) high = np.repeat(wos.high, self.nstack, axis=-1) self.stackedobs = np.zeros((venv.num_envs,)+low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): VecEnvWrapper.__init__(self, venv) self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None self.ret_rms = None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon
def __init__(self, venv, nstack, device=None): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space self.shape_dim0 = wos.shape[0] low = np.repeat(wos.low, self.nstack, axis=0) high = np.repeat(wos.high, self.nstack, axis=0) if device is None: device = torch.device('cpu') self.stacked_obs = torch.zeros((venv.num_envs, ) + low.shape).to(device) observation_space = gym.spaces.Box( low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, visual_obs=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): VecEnvWrapper.__init__(self, venv) self.ob_rms = RunningMeanStd( shape=self.observation_space.spaces['visual'].shape ) if visual_obs else None self.ret_rms = RunningMeanStd(shape=()) if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon self.training = True
def __init__(self, venv, obs=False, ret=False, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): VecEnvWrapper.__init__(self, venv) self.obs_rms = RunningMeanStd( shape=self.observation_space.shape) if obs else None self.ret_rms = RunningMeanStd(shape=()) if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon
def __init__(self, venv, priors, device=None): self.venv = venv self.nstack = venv.nstack self.addit_ch = 3 self.priors = priors wos = venv.observation_space # wrapped ob space w_shape = wos.shape low = np.zeros((w_shape[0] + self.addit_ch, w_shape[1], w_shape[2])) high = np.full((w_shape[0] + self.addit_ch, w_shape[1], w_shape[2]), 255) if device is None: self.device = torch.device('cpu') self.full_obs = torch.zeros((venv.num_envs, ) + low.shape).to(device) observation_space = gym.spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, nstack, device=None): self.venv = venv self.nstack = nstack wrapped_ob_space = venv.observation_space # should be 1 x 84 x 84 self.shape_dim0 = wrapped_ob_space.shape[0] # shape_dim0 is 1 # wrapped_ob_space.low is ZERO matrix of size 1 x 84 x 84, we make it 4 x 84 x 84 now # wrapped_ob_space.high is 255-matrix of size 1 x 84 x 84, we make it 4 x 84 x 84 now low = np.repeat(wrapped_ob_space.low, self.nstack, axis=0) high = np.repeat(wrapped_ob_space.high, self.nstack, axis=0) if device is None: device = torch.device('cpu') new_shape_tuple = (venv.num_envs, ) + low.shape # num_processes x 4 x 84 x 84 self.stacked_obs = torch.zeros(new_shape_tuple).to(device) observation_space = gym.spaces.Box( low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): VecEnvWrapper.__init__(self, venv) self.ob_rms = RunningMeanStd( shape=self.observation_space.shape) if ob else None self.ret_rms = RunningMeanStd(shape=()) if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon self.variables_name_save = [ 'clipob', 'cliprew', 'ret', 'gamma', 'epsilon' ]
def __init__(self, venv, nstack): self.venv = venv self.nstack = nstack self.dict_obs = False if isinstance(venv.observation_space, gym.spaces.Dict): wos = venv.observation_space.spaces['observation'] self.dict_obs = True else: wos = venv.observation_space # wrapped ob space low = np.repeat(wos.low, self.nstack, axis=-1) high = np.repeat(wos.high, self.nstack, axis=-1) self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) if self.dict_obs: _observation_space = venv.observation_space _observation_space.spaces['observation'] = observation_space observation_space = _observation_space VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): VecEnvWrapper.__init__(self, venv) if isinstance(self.observation_space, dict): self.ob_rms = RunningMeanStd( shape=self.observation_space["vector"].shape) if ob else None else: self.ob_rms = RunningMeanStd( shape=self.observation_space.shape) if ob else None self.ret_rms = RunningMeanStd(shape=()) if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon
def __init__(self, venv, num_models, model_dir, include_action, num_layers, embedding_dims, ctrl_coeff=0., alive_bonus=0.): VecEnvWrapper.__init__(self, venv) self.ctrl_coeff = ctrl_coeff self.alive_bonus = alive_bonus self.graph = tf.Graph() config = tf.ConfigProto(device_count={'GPU': 0}) # Run on CPU #config.gpu_options.allow_growth = True self.sess = tf.Session(graph=self.graph, config=config) with self.graph.as_default(): with self.sess.as_default(): import os, sys dir_path = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.join(dir_path, '..', '..', '..', '..')) from preference_learning import Model print(os.path.realpath(model_dir)) self.models = [] for i in range(num_models): with tf.variable_scope('model_%d' % i): model = Model(include_action, self.venv.observation_space.shape[0], self.venv.action_space.shape[0], num_layers=num_layers, embedding_dims=embedding_dims) model.saver.restore(self.sess, model_dir + '/model_%d.ckpt' % (i)) self.models.append(model)
def __init__( self, venv: Env, ob: bool = True, ret: bool = True, clipob: float = 10.0, cliprew: float = 10.0, gamma: float = 0.99, epsilon: float = 1e-8, first_n: int = None, ) -> None: """ Modified init function of VecNormalize. The only change here is in modifying the shape of self.ob_rms. The argument ``first_n`` controls how much of the observation we want to normalize: for an observation ``obs``, we normalize the vector ``obs[:first_n]``. """ VecEnvWrapper.__init__(self, venv) if ob is not None: if first_n is None: self.ob_rms = RunningMeanStd( shape=self.observation_space.shape) else: if len(self.observation_space.shape) == 1: self.ob_rms = RunningMeanStd(shape=(first_n, )) else: raise NotImplementedError else: self.ob_rms = None self.ret_rms = RunningMeanStd(shape=()) if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon self.first_n = first_n
def __init__(self, venv): self.venv = venv VecEnvWrapper.__init__(self, venv)
def __init__(self, venv): VecEnvWrapper.__init__(self, venv) self.episodic_rets = None self.episodic_lens = None
def close(self): VecEnvWrapper.close(self) self.close_video_recorder()
def __init__(self, env, *, reward_network): VecEnvWrapper.__init__(self, env) self.reward_network = reward_network self.prev_obs = None
def __init__(self, venv, dim=6): VecEnvWrapper.__init__(self, venv) self.dim = self.action_space.n
def __init__(self, venv, r_model): VecEnvWrapper.__init__(self, venv) assert callable(r_model) self.r_model = r_model
def __init__(self, venv): VecEnvWrapper.__init__(self, venv)