def __init__(self, *args, **kwargs): self.random_start = kwargs.get("random_start", True) MujocoEnv.__init__( self, os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets", "inverted_double_pendulum.xml")) Serializable.quick_init(self, locals())
def __init__(self, env_spec, max_replay_buffer_size): Serializable.quick_init(self, locals()) super(SimpleReplayBuffer, self).__init__(env_spec) max_replay_buffer_size = int(max_replay_buffer_size) # TODO: add support for changing buffer datatypes and attributes self._env_spec = env_spec self._observation_dim = env_spec.observation_space.flat_dim self._action_dim = env_spec.action_space.flat_dim self._max_buffer_size = max_replay_buffer_size self._observations = np.zeros( (max_replay_buffer_size, self._observation_dim), dtype='float32') # It's a bit memory inefficient to save the observations twice, # but it makes the code *much* easier since you no longer have to # worry about termination conditions. # TODO: add logic for termination condition to save memory # TODO: at least convert skill to int or use sparse matrix to save mem self._next_obs = np.zeros( (max_replay_buffer_size, self._observation_dim), dtype='float32') self._actions = np.zeros((max_replay_buffer_size, self._action_dim), dtype='float32') self._rewards = np.zeros(max_replay_buffer_size, dtype='float32') # self._terminals[i] = a terminal was received at time i self._dones = np.zeros(max_replay_buffer_size, dtype='uint8') self._top = 0 self._size = 0
def __init__(self, observation_space, action_space): """ :type observation_space: Space :type action_space: Space """ Serializable.quick_init(self, locals()) self._observation_space = observation_space self._action_space = action_space
def __init__( self, alive_coeff=1, ctrl_cost_coeff=0.01, *args, **kwargs): self.alive_coeff = alive_coeff self.ctrl_cost_coeff = ctrl_cost_coeff MujocoEnv.__init__(self, os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets", "hopper.xml")) Serializable.quick_init(self, locals())
def __init__(self, env_name, record_video=False, video_schedule=None, log_dir=None, record_log=False, force_reset=True, seed=1): # if log_dir is None: # if logger.get_snapshot_dir() is None: # logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.") # else: # log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log") Serializable.quick_init(self, locals()) env = gym.envs.make(env_name) env.seed(seed) # HACK: Gets rid of the TimeLimit wrapper that sets 'done = True' when # the time limit specified for each environment has been passed and # therefore the environment is not Markovian (terminal condition depends # on time rather than state). env = env.env self.env = env self.env_id = env.spec.id assert not (not record_log and record_video) if log_dir is None or record_log is False: self.monitoring = False else: if not record_video: video_schedule = NoVideoSchedule() else: if video_schedule is None: video_schedule = CappedCubicVideoSchedule() self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True) self.monitoring = True self._observation_space = convert_gym_space(env.observation_space) # logger.log("observation space: {}".format(self._observation_space)) self._action_space = convert_gym_space(env.action_space) # logger.log("action space: {}".format(self._action_space)) self._horizon = env.spec.tags[ 'wrapper_config.TimeLimit.max_episode_steps'] self._log_dir = log_dir self._force_reset = force_reset
def __init__(self, vel_deviation_cost_coeff=1e-2, alive_bonus=0.2, ctrl_cost_coeff=1e-3, impact_cost_coeff=1e-5, *args, **kwargs): self.vel_deviation_cost_coeff = vel_deviation_cost_coeff self.alive_bonus = alive_bonus self.ctrl_cost_coeff = ctrl_cost_coeff self.impact_cost_coeff = impact_cost_coeff MujocoEnv.__init__( self, os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets", "simple_humanoid.xml")) Serializable.quick_init(self, locals())
def __init__(self, env, reward_fn): Serializable.quick_init(self, locals()) self._wrapped_env = env self.reward_function = reward_fn self.num_skills = num_skills self.z = skill obs_space = self.observation_space low = np.hstack([obs_space.low, np.full(num_skills, 0)]) high = np.hstack([obs_space.high, np.full(num_skills, 1)]) self.observation_space = spaces.Box(low=low, high=high) # TODO verfiy below line is doing something useful self.action_space = self.action_space print("Veriifying action space: ", self.action_space) self.spec = EnvSpec(self.observation_space, self.action_space) self.reset(state=None, skill=self.z)
def __init__( self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, obs_alpha=0.001, reward_alpha=0.001, normalization_scale=1., ): Serializable.quick_init(self, locals()) self._scale_reward = 1 self._wrapped_env = env self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._obs_alpha = obs_alpha self._obs_mean = np.zeros(self.observation_space.shape) self._obs_var = np.ones(self.observation_space.shape) self._reward_alpha = reward_alpha self._reward_mean = 0. self._reward_var = 1. self._normalization_scale = normalization_scale
def __init__(self, *args, **kwargs): MujocoEnv.__init__( self, os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets", "half_cheetah.xml")) Serializable.__init__(self, *args, **kwargs)
def __init__(self, *args, **kwargs): super(AntEnv, self).__init__(*args, **kwargs) Serializable.__init__(self, *args, **kwargs)
def __setstate__(self, d): Serializable.__setstate__(self, d) self._obs_mean = d["_obs_mean"] self._obs_var = d["_obs_var"]
def __getstate__(self): d = Serializable.__getstate__(self) d["_obs_mean"] = self._obs_mean d["_obs_var"] = self._obs_var return d