def __init__(self): EzPickle.__init__(self) self.seed() self.viewer = None self.world = Box2D.b2World() self.moon = None self.lander = None self.particles = [] self.prev_reward = None # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-np.inf, np.inf, shape=(8, ), dtype=np.float32) if self.continuous: # Action is two floats [main engine, left-right engines]. # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power. # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off self.action_space = spaces.Box(-1, +1, (2, ), dtype=np.float32) else: # Nop, fire left engine, main engine, right engine self.action_space = spaces.Discrete(4) self.reset()
def __init__(self, initial_wealth=25.0, edge_prior_alpha=7, edge_prior_beta=3, max_wealth_alpha=5.0, max_wealth_m=200.0, max_rounds_mean=300.0, max_rounds_sd=25.0, reseed=True): # store the hyper-parameters for passing back into __init__() during resets so # the same hyper-parameters govern the next game's parameters, as the user # expects: # TODO: this is boilerplate, is there any more elegant way to do this? self.initial_wealth = float(initial_wealth) self.edge_prior_alpha = edge_prior_alpha self.edge_prior_beta = edge_prior_beta self.max_wealth_alpha = max_wealth_alpha self.max_wealth_m = max_wealth_m self.max_rounds_mean = max_rounds_mean self.max_rounds_sd = max_rounds_sd if reseed or not hasattr(self, 'np_random'): self.seed() # draw this game's set of parameters: edge = self.np_random.beta(edge_prior_alpha, edge_prior_beta) max_wealth = round( genpareto.rvs(max_wealth_alpha, max_wealth_m, random_state=self.np_random)) max_rounds = int( round(self.np_random.normal(max_rounds_mean, max_rounds_sd))) # add an additional global variable which is the sufficient statistic for the # Pareto distribution on wealth cap; alpha doesn't update, but x_m does, and # simply is the highest wealth count we've seen to date: self.max_ever_wealth = float(self.initial_wealth) # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.rounds_elapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(int(max_wealth * 100)) self.observation_space = spaces.Tuple(( spaces.Box(0, max_wealth, shape=[1], dtype=np.float32), # current wealth spaces.Discrete(max_rounds + 1), # rounds elapsed spaces.Discrete(max_rounds + 1), # wins spaces.Discrete(max_rounds + 1), # losses spaces.Box(0, max_wealth, [1], dtype=np.float32))) # maximum observed wealth self.reward_range = (0, max_wealth) self.edge = edge self.wealth = self.initial_wealth self.max_rounds = max_rounds self.rounds = self.max_rounds self.max_wealth = max_wealth
def __init__(self, observation_keys=('state')): self.observation_space = spaces.Dict({ name: spaces.Box(shape=(2, ), low=-1, high=1, dtype=np.float32) for name in observation_keys }) self.action_space = spaces.Box(shape=(1, ), low=-1, high=1, dtype=np.float32)
def __init__( self, game='pong', mode=None, difficulty=None, obs_type='ram', frameskip=(2, 5), repeat_action_probability=0., full_action_space=False): """Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int.""" utils.EzPickle.__init__( self, game, mode, difficulty, obs_type, frameskip, repeat_action_probability) assert obs_type in ('ram', 'image') self.game = game self.game_path = atari_py.get_game_path(game) self.game_mode = mode self.game_difficulty = difficulty if not os.path.exists(self.game_path): msg = 'You asked for game %s but path %s does not exist' raise IOError(msg % (game, self.game_path)) self._obs_type = obs_type self.frameskip = frameskip self.ale = atari_py.ALEInterface() self.viewer = None # Tune (or disable) ALE's action repeat: # https://github.com/openai/gym/issues/349 assert isinstance(repeat_action_probability, (float, int)), \ "Invalid repeat_action_probability: {!r}".format(repeat_action_probability) self.ale.setFloat( 'repeat_action_probability'.encode('utf-8'), repeat_action_probability) self.seed() self._action_set = (self.ale.getLegalActionSet() if full_action_space else self.ale.getMinimalActionSet()) self.action_space = spaces.Discrete(len(self._action_set)) (screen_width, screen_height) = self.ale.getScreenDims() if self._obs_type == 'ram': self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,)) elif self._obs_type == 'image': self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8) else: raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
def __init__( self, model_path, target_position, target_rotation, target_position_range, reward_type, initial_qpos={}, randomize_initial_position=True, randomize_initial_rotation=True, distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False, ignore_z_target_rotation=False, touch_visualisation="on_touch", touch_get_obs="sensordata", obs_type = 'original', ): """Initializes a new Hand manipulation environment with touch sensors. Args: touch_visualisation (string): how touch sensor sites are visualised - "on_touch": shows touch sensor sites only when touch values > 0 - "always": always shows touch sensor sites - "off" or else: does not show touch sensor sites touch_get_obs (string): touch sensor readings - "boolean": returns 1 if touch sensor reading != 0.0 else 0 - "sensordata": returns original touch sensor readings from self.sim.data.sensordata[id] - "log": returns log(x+1) touch sensor readings from self.sim.data.sensordata[id] - "off" or else: does not add touch sensor readings to the observation """ self.obs_type = obs_type self.touch_visualisation = touch_visualisation self.touch_get_obs = touch_get_obs self._touch_sensor_id_site_id = [] self._touch_sensor_id = [] self.touch_color = [1, 0, 0, 0.5] self.notouch_color = [0, 0.5, 0, 0.2] manipulate.ManipulateEnv.__init__( self, model_path, target_position, target_rotation, target_position_range, reward_type, initial_qpos=initial_qpos, randomize_initial_position=randomize_initial_position, randomize_initial_rotation=randomize_initial_rotation, distance_threshold=distance_threshold, rotation_threshold=rotation_threshold, n_substeps=n_substeps, relative_control=relative_control, ignore_z_target_rotation=ignore_z_target_rotation, ) for k, v in self.sim.model._sensor_name2id.items(): # get touch sensor site names and their ids if 'robot0:TS_' in k: self._touch_sensor_id_site_id.append((v, self.sim.model._site_name2id[k.replace('robot0:TS_', 'robot0:T_')])) self._touch_sensor_id.append(v) if self.touch_visualisation == 'off': # set touch sensors rgba values for _, site_id in self._touch_sensor_id_site_id: self.sim.model.site_rgba[site_id][3] = 0.0 elif self.touch_visualisation == 'always': pass obs = self._get_obs() self.observation_space = spaces.Dict(dict( desired_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'), achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'), observation=spaces.Box(-np.inf, np.inf, shape=obs['observation'].shape, dtype='float32'), ))
def __init__(self, max_episode_steps=None): self.object_radius = 0.03 self._max_episode_steps = max_episode_steps self._elapsed_steps = 0 high = np.array([0.1, 0.1]) self.low = -high[0] self.high = high[0] self.action_space = spaces.Box(low=-high, high=high) utils.EzPickle.__init__(self) fullpath = os.path.join(os.path.dirname(__file__), "assets", 'pusher_plane_straight.xml') # mujoco_e.MujocoEnv.__init__(self, fullpath, 2) mujoco_env.MujocoEnv.__init__(self, fullpath, 2) self.action_space = spaces.Box(low=-high, high=high) self.cam_name = 'top_cam' self.frame_skip = 5 self.object_start = 8
def __init__(self): self.seed() self.viewer = None self.observation_space = spaces.Box(0, 255, (FIELD_H,FIELD_W,3), dtype=np.uint8) self.action_space = spaces.Discrete(3) self.reset()
def __init__(self): self.viewer = None high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2]) low = -high self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) self.action_space = spaces.Discrete(3) self.state = None self.seed()
def __init__(self, env): super(FlattenObservation, self).__init__(env) flatdim = spaces.flatdim(env.observation_space) self.observation_space = spaces.Box(low=-float('inf'), high=float('inf'), shape=(flatdim, ), dtype=np.float32)
def __init__(self, model_path, initial_qpos, n_actions, n_substeps): if model_path.startswith('/'): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), 'assets', model_path) if not os.path.exists(fullpath): raise IOError('File {} does not exist'.format(fullpath)) model = mujoco_py.load_model_from_path(fullpath) self.sim = mujoco_py.MjSim(model, nsubsteps=n_substeps) self.viewer = None self._viewers = {} self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.seed() self._env_setup(initial_qpos=initial_qpos) self.initial_state = copy.deepcopy(self.sim.get_state()) self.goal = self._sample_goal() obs = self._get_obs() self.action_space = spaces.Box(-1., 1., shape=(n_actions, ), dtype='float32') self.observation_space = spaces.Dict( dict( desired_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'), achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'), observation=spaces.Box(-np.inf, np.inf, shape=obs['observation'].shape, dtype='float32'), ))
def __init__(self, venv, nstack): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space low = np.repeat(wos.low, self.nstack, axis=-1) high = np.repeat(wos.high, self.nstack, axis=-1) self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def convert_observation_to_space(observation): if isinstance(observation, dict): space = spaces.Dict( OrderedDict([(key, convert_observation_to_space(value)) for key, value in observation.items()])) elif isinstance(observation, np.ndarray): low = np.full(observation.shape, -float('inf')) high = np.full(observation.shape, float('inf')) space = spaces.Box(low, high, dtype=observation.dtype) else: raise NotImplementedError(type(observation), observation) return space
def __init__(self): self.seed() self.viewer = None self.observation_space = spaces.Box(0, 255, (FIELD_H, FIELD_W, 3), dtype=np.uint8) self.action_space = spaces.Discrete(10) self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8) for digit in range(10): for y in range(6): self.bogus_mnist[digit, y, :] = [ ord(char) for char in bogus_mnist[digit][y] ] self.reset()
def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self._seed() self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self.sim.forward()
def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
def __init__(self): self.range = 1000 # +/- value the randomly select number can be between self.bounds = 2000 # Action space bounds self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), dtype=np.float32) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self.seed() self.reset()
def __init__(self): self.range = 1000 # Randomly selected number is within +/- this value self.bounds = 10000 self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]), dtype=np.float32) self.observation_space = spaces.Discrete(4) self.number = 0 self.guess_count = 0 self.guess_max = 200 self.observation = 0 self.seed() self.reset()
def __init__(self, env, k): """Stack k last frames. Returns lazy array, which is much more memory efficient. See Also -------- baselines.common.atari_wrappers.LazyFrames """ gym.Wrapper.__init__(self, env) self.k = k self.frames = deque([], maxlen=k) shp = env.observation_space.shape self.observation_space = spaces.Box(low=0, high=255, shape=(shp[:-1] + (shp[-1] * k, )), dtype=env.observation_space.dtype)
def __init__(self, initial_wealth=25.0, edge=0.6, max_wealth=250.0, max_rounds=300): self.action_space = spaces.Discrete(int(max_wealth * 100)) # betting in penny # increments self.observation_space = spaces.Tuple(( spaces.Box(0, max_wealth, [1], dtype=np.float32), # (w,b) spaces.Discrete(max_rounds + 1))) self.reward_range = (0, max_wealth) self.edge = edge self.wealth = initial_wealth self.initial_wealth = initial_wealth self.max_rounds = max_rounds self.max_wealth = max_wealth self.np_random = None self.rounds = None self.seed() self.reset()
def __init__(self): self.action_space = spaces.Box( shape=(1, ), low=-1, high=1, dtype=np.float32)
def __init__(self, *args, **kwargs): self.observation_space = spaces.Dict({ 'state': spaces.Box(shape=(2, ), low=-1, high=1, dtype=np.float32), }) super(FakeDictObservationEnvironment, self).__init__(*args, **kwargs)
def _set_action_space(self): bounds = self.model.actuator_ctrlrange.copy() low, high = bounds.T self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) return self.action_space
def __init__(self, env, pixels_only=True, render_kwargs=None, pixel_keys=('pixels', )): """Initializes a new pixel Wrapper. Args: env: The environment to wrap. pixels_only: If `True` (default), the original observation returned by the wrapped environment will be discarded, and a dictionary observation will only include pixels. If `False`, the observation dictionary will contain both the original observations and the pixel observations. render_kwargs: Optional `dict` containing keyword arguments passed to the `self.render` method. pixel_keys: Optional custom string specifying the pixel observation's key in the `OrderedDict` of observations. Defaults to 'pixels'. Raises: ValueError: If `env`'s observation spec is not compatible with the wrapper. Supported formats are a single array, or a dict of arrays. ValueError: If `env`'s observation already contains any of the specified `pixel_keys`. """ super(PixelObservationWrapper, self).__init__(env) if render_kwargs is None: render_kwargs = {} for key in pixel_keys: render_kwargs.setdefault(key, {}) render_mode = render_kwargs[key].pop('mode', 'rgb_array') assert render_mode == 'rgb_array', render_mode render_kwargs[key]['mode'] = 'rgb_array' wrapped_observation_space = env.observation_space if isinstance(wrapped_observation_space, spaces.Box): self._observation_is_dict = False invalid_keys = set([STATE_KEY]) elif isinstance(wrapped_observation_space, (spaces.Dict, collections.MutableMapping)): self._observation_is_dict = True invalid_keys = set(wrapped_observation_space.spaces.keys()) else: raise ValueError("Unsupported observation space structure.") if not pixels_only: # Make sure that now keys in the `pixel_keys` overlap with # `observation_keys` overlapping_keys = set(pixel_keys) & set(invalid_keys) if overlapping_keys: raise ValueError("Duplicate or reserved pixel keys {!r}." .format(overlapping_keys)) if pixels_only: self.observation_space = spaces.Dict() elif self._observation_is_dict: self.observation_space = copy.deepcopy(wrapped_observation_space) else: self.observation_space = spaces.Dict() self.observation_space.spaces[STATE_KEY] = wrapped_observation_space # Extend observation space with pixels. pixels_spaces = {} for pixel_key in pixel_keys: pixels = self.env.render(**render_kwargs) if np.issubdtype(pixels.dtype, np.integer): low, high = (0, 255) elif np.issubdtype(pixels.dtype, np.float): low, high = (-float('inf'), float('inf')) else: raise TypeError(pixels.dtype) pixels_space = spaces.Box( shape=pixels.shape, low=low, high=high, dtype=pixels.dtype) pixels_spaces[pixel_key] = pixels_space self.observation_space.spaces.update(pixels_spaces) self._env = env self._pixels_only = pixels_only self._render_kwargs = render_kwargs self._pixel_keys = pixel_keys