Beispiel #1
0
    def __init__(self):
        EzPickle.__init__(self)
        self.seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
        self.particles = []

        self.prev_reward = None

        # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-np.inf,
                                            np.inf,
                                            shape=(8, ),
                                            dtype=np.float32)

        if self.continuous:
            # Action is two floats [main engine, left-right engines].
            # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
            # Left-right:  -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
            self.action_space = spaces.Box(-1, +1, (2, ), dtype=np.float32)
        else:
            # Nop, fire left engine, main engine, right engine
            self.action_space = spaces.Discrete(4)

        self.reset()
Beispiel #2
0
    def __init__(self,
                 initial_wealth=25.0,
                 edge_prior_alpha=7,
                 edge_prior_beta=3,
                 max_wealth_alpha=5.0,
                 max_wealth_m=200.0,
                 max_rounds_mean=300.0,
                 max_rounds_sd=25.0,
                 reseed=True):
        # store the hyper-parameters for passing back into __init__() during resets so
        # the same hyper-parameters govern the next game's parameters, as the user
        # expects:
        # TODO: this is boilerplate, is there any more elegant way to do this?
        self.initial_wealth = float(initial_wealth)
        self.edge_prior_alpha = edge_prior_alpha
        self.edge_prior_beta = edge_prior_beta
        self.max_wealth_alpha = max_wealth_alpha
        self.max_wealth_m = max_wealth_m
        self.max_rounds_mean = max_rounds_mean
        self.max_rounds_sd = max_rounds_sd

        if reseed or not hasattr(self, 'np_random'):
            self.seed()

        # draw this game's set of parameters:
        edge = self.np_random.beta(edge_prior_alpha, edge_prior_beta)
        max_wealth = round(
            genpareto.rvs(max_wealth_alpha,
                          max_wealth_m,
                          random_state=self.np_random))
        max_rounds = int(
            round(self.np_random.normal(max_rounds_mean, max_rounds_sd)))

        # add an additional global variable which is the sufficient statistic for the
        # Pareto distribution on wealth cap; alpha doesn't update, but x_m does, and
        # simply is the highest wealth count we've seen to date:
        self.max_ever_wealth = float(self.initial_wealth)
        # for the coinflip edge, it is total wins/losses:
        self.wins = 0
        self.losses = 0
        # for the number of rounds, we need to remember how many rounds we've played:
        self.rounds_elapsed = 0

        # the rest proceeds as before:
        self.action_space = spaces.Discrete(int(max_wealth * 100))
        self.observation_space = spaces.Tuple((
            spaces.Box(0, max_wealth, shape=[1],
                       dtype=np.float32),  # current wealth
            spaces.Discrete(max_rounds + 1),  # rounds elapsed
            spaces.Discrete(max_rounds + 1),  # wins
            spaces.Discrete(max_rounds + 1),  # losses
            spaces.Box(0, max_wealth, [1],
                       dtype=np.float32)))  # maximum observed wealth
        self.reward_range = (0, max_wealth)
        self.edge = edge
        self.wealth = self.initial_wealth
        self.max_rounds = max_rounds
        self.rounds = self.max_rounds
        self.max_wealth = max_wealth
Beispiel #3
0
 def __init__(self, observation_keys=('state')):
     self.observation_space = spaces.Dict({
         name: spaces.Box(shape=(2, ), low=-1, high=1, dtype=np.float32)
         for name in observation_keys
     })
     self.action_space = spaces.Box(shape=(1, ),
                                    low=-1,
                                    high=1,
                                    dtype=np.float32)
Beispiel #4
0
    def __init__(
            self,
            game='pong',
            mode=None,
            difficulty=None,
            obs_type='ram',
            frameskip=(2, 5),
            repeat_action_probability=0.,
            full_action_space=False):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(
                self,
                game,
                mode,
                difficulty,
                obs_type,
                frameskip,
                repeat_action_probability)
        assert obs_type in ('ram', 'image')

        self.game = game
        self.game_path = atari_py.get_game_path(game)
        self.game_mode = mode
        self.game_difficulty = difficulty

        if not os.path.exists(self.game_path):
            msg = 'You asked for game %s but path %s does not exist'
            raise IOError(msg % (game, self.game_path))
        self._obs_type = obs_type
        self.frameskip = frameskip
        self.ale = atari_py.ALEInterface()
        self.viewer = None

        # Tune (or disable) ALE's action repeat:
        # https://github.com/openai/gym/issues/349
        assert isinstance(repeat_action_probability, (float, int)), \
                "Invalid repeat_action_probability: {!r}".format(repeat_action_probability)
        self.ale.setFloat(
                'repeat_action_probability'.encode('utf-8'),
                repeat_action_probability)

        self.seed()

        self._action_set = (self.ale.getLegalActionSet() if full_action_space
                            else self.ale.getMinimalActionSet())
        self.action_space = spaces.Discrete(len(self._action_set))

        (screen_width, screen_height) = self.ale.getScreenDims()
        if self._obs_type == 'ram':
            self.observation_space = spaces.Box(low=0, high=255, dtype=np.uint8, shape=(128,))
        elif self._obs_type == 'image':
            self.observation_space = spaces.Box(low=0, high=255, shape=(screen_height, screen_width, 3), dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(self._obs_type))
Beispiel #5
0
    def __init__(
        self, model_path, target_position, target_rotation,
        target_position_range, reward_type, initial_qpos={},
        randomize_initial_position=True, randomize_initial_rotation=True,
        distance_threshold=0.01, rotation_threshold=0.1, n_substeps=20, relative_control=False,
        ignore_z_target_rotation=False, touch_visualisation="on_touch", touch_get_obs="sensordata",
        obs_type = 'original',
    ):
        """Initializes a new Hand manipulation environment with touch sensors.

        Args:
            touch_visualisation (string): how touch sensor sites are visualised
                - "on_touch": shows touch sensor sites only when touch values > 0
                - "always": always shows touch sensor sites
                - "off" or else: does not show touch sensor sites
            touch_get_obs (string): touch sensor readings
                - "boolean": returns 1 if touch sensor reading != 0.0 else 0
                - "sensordata": returns original touch sensor readings from self.sim.data.sensordata[id]
                - "log": returns log(x+1) touch sensor readings from self.sim.data.sensordata[id]
                - "off" or else: does not add touch sensor readings to the observation

        """
        self.obs_type = obs_type
        self.touch_visualisation = touch_visualisation
        self.touch_get_obs = touch_get_obs
        self._touch_sensor_id_site_id = []
        self._touch_sensor_id = []
        self.touch_color = [1, 0, 0, 0.5]
        self.notouch_color = [0, 0.5, 0, 0.2]

        manipulate.ManipulateEnv.__init__(
            self, model_path, target_position, target_rotation,
            target_position_range, reward_type, initial_qpos=initial_qpos,
            randomize_initial_position=randomize_initial_position, randomize_initial_rotation=randomize_initial_rotation,
            distance_threshold=distance_threshold, rotation_threshold=rotation_threshold, n_substeps=n_substeps, relative_control=relative_control,
            ignore_z_target_rotation=ignore_z_target_rotation,
        )

        for k, v in self.sim.model._sensor_name2id.items():  # get touch sensor site names and their ids
            if 'robot0:TS_' in k:
                self._touch_sensor_id_site_id.append((v, self.sim.model._site_name2id[k.replace('robot0:TS_', 'robot0:T_')]))
                self._touch_sensor_id.append(v)

        if self.touch_visualisation == 'off':  # set touch sensors rgba values
            for _, site_id in self._touch_sensor_id_site_id:
                self.sim.model.site_rgba[site_id][3] = 0.0
        elif self.touch_visualisation == 'always':
            pass

        obs = self._get_obs()
        self.observation_space = spaces.Dict(dict(
            desired_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
            achieved_goal=spaces.Box(-np.inf, np.inf, shape=obs['achieved_goal'].shape, dtype='float32'),
            observation=spaces.Box(-np.inf, np.inf, shape=obs['observation'].shape, dtype='float32'),
        ))
Beispiel #6
0
 def __init__(self, max_episode_steps=None):
     self.object_radius = 0.03
     self._max_episode_steps = max_episode_steps
     self._elapsed_steps = 0
     high = np.array([0.1, 0.1])
     self.low = -high[0]
     self.high = high[0]
     self.action_space = spaces.Box(low=-high, high=high)
     utils.EzPickle.__init__(self)
     fullpath = os.path.join(os.path.dirname(__file__), "assets",
                             'pusher_plane_straight.xml')
     # mujoco_e.MujocoEnv.__init__(self, fullpath, 2)
     mujoco_env.MujocoEnv.__init__(self, fullpath, 2)
     self.action_space = spaces.Box(low=-high, high=high)
     self.cam_name = 'top_cam'
     self.frame_skip = 5
     self.object_start = 8
Beispiel #7
0
    def __init__(self):
        self.seed()
        self.viewer = None

        self.observation_space = spaces.Box(0, 255, (FIELD_H,FIELD_W,3), dtype=np.uint8)
        self.action_space = spaces.Discrete(3)

        self.reset()
Beispiel #8
0
 def __init__(self):
     self.viewer = None
     high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
     low = -high
     self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
     self.action_space = spaces.Discrete(3)
     self.state = None
     self.seed()
Beispiel #9
0
    def __init__(self, env):
        super(FlattenObservation, self).__init__(env)

        flatdim = spaces.flatdim(env.observation_space)
        self.observation_space = spaces.Box(low=-float('inf'),
                                            high=float('inf'),
                                            shape=(flatdim, ),
                                            dtype=np.float32)
Beispiel #10
0
    def __init__(self, model_path, initial_qpos, n_actions, n_substeps):
        if model_path.startswith('/'):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), 'assets',
                                    model_path)
        if not os.path.exists(fullpath):
            raise IOError('File {} does not exist'.format(fullpath))

        model = mujoco_py.load_model_from_path(fullpath)
        self.sim = mujoco_py.MjSim(model, nsubsteps=n_substeps)
        self.viewer = None
        self._viewers = {}

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }

        self.seed()
        self._env_setup(initial_qpos=initial_qpos)
        self.initial_state = copy.deepcopy(self.sim.get_state())

        self.goal = self._sample_goal()
        obs = self._get_obs()
        self.action_space = spaces.Box(-1.,
                                       1.,
                                       shape=(n_actions, ),
                                       dtype='float32')
        self.observation_space = spaces.Dict(
            dict(
                desired_goal=spaces.Box(-np.inf,
                                        np.inf,
                                        shape=obs['achieved_goal'].shape,
                                        dtype='float32'),
                achieved_goal=spaces.Box(-np.inf,
                                         np.inf,
                                         shape=obs['achieved_goal'].shape,
                                         dtype='float32'),
                observation=spaces.Box(-np.inf,
                                       np.inf,
                                       shape=obs['observation'].shape,
                                       dtype='float32'),
            ))
Beispiel #11
0
 def __init__(self, venv, nstack):
     self.venv = venv
     self.nstack = nstack
     wos = venv.observation_space  # wrapped ob space
     low = np.repeat(wos.low, self.nstack, axis=-1)
     high = np.repeat(wos.high, self.nstack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype)
     observation_space = spaces.Box(low=low,
                                    high=high,
                                    dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Beispiel #12
0
def convert_observation_to_space(observation):
    if isinstance(observation, dict):
        space = spaces.Dict(
            OrderedDict([(key, convert_observation_to_space(value))
                         for key, value in observation.items()]))
    elif isinstance(observation, np.ndarray):
        low = np.full(observation.shape, -float('inf'))
        high = np.full(observation.shape, float('inf'))
        space = spaces.Box(low, high, dtype=observation.dtype)
    else:
        raise NotImplementedError(type(observation), observation)

    return space
Beispiel #13
0
 def __init__(self):
     self.seed()
     self.viewer = None
     self.observation_space = spaces.Box(0,
                                         255, (FIELD_H, FIELD_W, 3),
                                         dtype=np.uint8)
     self.action_space = spaces.Discrete(10)
     self.bogus_mnist = np.zeros((10, 6, 6), dtype=np.uint8)
     for digit in range(10):
         for y in range(6):
             self.bogus_mnist[digit, y, :] = [
                 ord(char) for char in bogus_mnist[digit][y]
             ]
     self.reset()
Beispiel #14
0
    def __init__(self, model_path, frame_skip):

        if model_path.startswith("/"):
            fullpath = model_path
        else:
            fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
        if not path.exists(fullpath):
            raise IOError("File %s does not exist" % fullpath)
        self.frame_skip = frame_skip
        self.model = load_model_from_path(fullpath)
        self.sim = MjSim(self.model)
        self.data = self.sim.data
        self._seed()

        self.metadata = {
            'render.modes': ['human', 'rgb_array'],
            'video.frames_per_second': int(np.round(1.0 / self.dt))
        }
        self.mujoco_render_frames = False


        self.init_qpos = self.data.qpos.ravel().copy()
        self.init_qvel = self.data.qvel.ravel().copy()
        observation, _reward, done, _info = self._step(np.zeros(self.model.nu))
        assert not done
        self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size

        bounds = self.model.actuator_ctrlrange.copy()
        low = bounds[:, 0]
        high = bounds[:, 1]
        self.action_space = spaces.Box(low, high)

        high = np.inf*np.ones(self.obs_dim)
        low = -high
        self.observation_space = spaces.Box(low, high)
        self.sim.forward()
Beispiel #15
0
    def __init__(self, verbose=1):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = FrictionDetector(self)
        self.world = Box2D.b2World(
            (0, 0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.car = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.verbose = verbose
        self.fd_tile = fixtureDef(shape=polygonShape(
            vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]))

        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([+1, +1, +1]),
                                       dtype=np.float32)  # steer, gas, brake
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(STATE_H, STATE_W, 3),
                                            dtype=np.uint8)
Beispiel #16
0
    def __init__(self):
        self.range = 1000  # +/- value the randomly select number can be between
        self.bounds = 2000  # Action space bounds

        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
                                       dtype=np.float32)
        self.observation_space = spaces.Discrete(4)

        self.number = 0
        self.guess_count = 0
        self.guess_max = 200
        self.observation = 0

        self.seed()
        self.reset()
Beispiel #17
0
    def __init__(self):
        self.range = 1000  # Randomly selected number is within +/- this value
        self.bounds = 10000

        self.action_space = spaces.Box(low=np.array([-self.bounds]),
                                       high=np.array([self.bounds]),
                                       dtype=np.float32)
        self.observation_space = spaces.Discrete(4)

        self.number = 0
        self.guess_count = 0
        self.guess_max = 200
        self.observation = 0

        self.seed()
        self.reset()
Beispiel #18
0
    def __init__(self, env, k):
        """Stack k last frames.

        Returns lazy array, which is much more memory efficient.

        See Also
        --------
        baselines.common.atari_wrappers.LazyFrames
        """
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(shp[:-1] + (shp[-1] * k, )),
                                            dtype=env.observation_space.dtype)
Beispiel #19
0
    def __init__(self,
                 initial_wealth=25.0,
                 edge=0.6,
                 max_wealth=250.0,
                 max_rounds=300):

        self.action_space = spaces.Discrete(int(max_wealth *
                                                100))  # betting in penny
        # increments
        self.observation_space = spaces.Tuple((
            spaces.Box(0, max_wealth, [1], dtype=np.float32),  # (w,b)
            spaces.Discrete(max_rounds + 1)))
        self.reward_range = (0, max_wealth)
        self.edge = edge
        self.wealth = initial_wealth
        self.initial_wealth = initial_wealth
        self.max_rounds = max_rounds
        self.max_wealth = max_wealth
        self.np_random = None
        self.rounds = None
        self.seed()
        self.reset()
Beispiel #20
0
 def __init__(self):
     self.action_space = spaces.Box(
         shape=(1, ), low=-1, high=1, dtype=np.float32)
Beispiel #21
0
 def __init__(self, *args, **kwargs):
     self.observation_space = spaces.Dict({
         'state': spaces.Box(shape=(2, ), low=-1, high=1, dtype=np.float32),
     })
     super(FakeDictObservationEnvironment, self).__init__(*args, **kwargs)
Beispiel #22
0
 def _set_action_space(self):
     bounds = self.model.actuator_ctrlrange.copy()
     low, high = bounds.T
     self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
     return self.action_space
Beispiel #23
0
    def __init__(self,
                 env,
                 pixels_only=True,
                 render_kwargs=None,
                 pixel_keys=('pixels', )):
        """Initializes a new pixel Wrapper.

        Args:
            env: The environment to wrap.
            pixels_only: If `True` (default), the original observation returned
                by the wrapped environment will be discarded, and a dictionary
                observation will only include pixels. If `False`, the
                observation dictionary will contain both the original
                observations and the pixel observations.
            render_kwargs: Optional `dict` containing keyword arguments passed
                to the `self.render` method.
            pixel_keys: Optional custom string specifying the pixel
                observation's key in the `OrderedDict` of observations.
                Defaults to 'pixels'.

        Raises:
            ValueError: If `env`'s observation spec is not compatible with the
                wrapper. Supported formats are a single array, or a dict of
                arrays.
            ValueError: If `env`'s observation already contains any of the
                specified `pixel_keys`.
        """

        super(PixelObservationWrapper, self).__init__(env)

        if render_kwargs is None:
            render_kwargs = {}

        for key in pixel_keys:
            render_kwargs.setdefault(key, {})

            render_mode = render_kwargs[key].pop('mode', 'rgb_array')
            assert render_mode == 'rgb_array', render_mode
            render_kwargs[key]['mode'] = 'rgb_array'

        wrapped_observation_space = env.observation_space

        if isinstance(wrapped_observation_space, spaces.Box):
            self._observation_is_dict = False
            invalid_keys = set([STATE_KEY])
        elif isinstance(wrapped_observation_space,
                        (spaces.Dict, collections.MutableMapping)):
            self._observation_is_dict = True
            invalid_keys = set(wrapped_observation_space.spaces.keys())
        else:
            raise ValueError("Unsupported observation space structure.")

        if not pixels_only:
            # Make sure that now keys in the `pixel_keys` overlap with
            # `observation_keys`
            overlapping_keys = set(pixel_keys) & set(invalid_keys)
            if overlapping_keys:
                raise ValueError("Duplicate or reserved pixel keys {!r}."
                                 .format(overlapping_keys))

        if pixels_only:
            self.observation_space = spaces.Dict()
        elif self._observation_is_dict:
            self.observation_space = copy.deepcopy(wrapped_observation_space)
        else:
            self.observation_space = spaces.Dict()
            self.observation_space.spaces[STATE_KEY] = wrapped_observation_space

        # Extend observation space with pixels.

        pixels_spaces = {}
        for pixel_key in pixel_keys:
            pixels = self.env.render(**render_kwargs)

            if np.issubdtype(pixels.dtype, np.integer):
                low, high = (0, 255)
            elif np.issubdtype(pixels.dtype, np.float):
                low, high = (-float('inf'), float('inf'))
            else:
                raise TypeError(pixels.dtype)

            pixels_space = spaces.Box(
                shape=pixels.shape, low=low, high=high, dtype=pixels.dtype)
            pixels_spaces[pixel_key] = pixels_space

        self.observation_space.spaces.update(pixels_spaces)

        self._env = env
        self._pixels_only = pixels_only
        self._render_kwargs = render_kwargs
        self._pixel_keys = pixel_keys