Ejemplo n.º 1
0
    def __init__(self):
        # init base classes
        Model.__init__(self)
        RenderInterface2D.__init__(self)

        # environment parameters
        self.max_speed = 8.
        self.max_torque = 2.
        self.dt = 0.5
        self.gravity = 10.
        self.mass = 1.
        self.length = 1.

        # rendering info
        self.set_clipping_area((-2.2, 2.2, -2.2, 2.2))
        self.set_refresh_interval(10)

        # observation and action spaces
        high = np.array([1., 1., self.max_speed], dtype=np.float32)
        low = -high
        self.action_space = spaces.Box(low=-self.max_torque,
                                       high=self.max_torque,
                                       shape=(1, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=low,
                                            high=high,
                                            dtype=np.float32)

        # initialize
        self.reset()
Ejemplo n.º 2
0
    def __init__(self, goal_velocity=0):
        # init base classes
        Model.__init__(self)
        RenderInterface2D.__init__(self)

        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.5
        self.goal_velocity = goal_velocity

        self.force = 0.001
        self.gravity = 0.0025

        self.low = np.array([self.min_position, -self.max_speed])
        self.high = np.array([self.max_position, self.max_speed])

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(self.low, self.high)

        self.reward_range = (0.0, 1.0)

        # rendering info
        self.set_clipping_area((-1.2, 0.6, -0.2, 1.1))
        self.set_refresh_interval(10)  # in milliseconds

        # initial reset
        self.reset()
Ejemplo n.º 3
0
    def __init__(self,
                 reward_free=False,
                 difficulty=0,
                 array_observation=False):
        self.reward_free = reward_free
        self.difficulty = difficulty
        self.array_observation = array_observation

        if difficulty not in [0, 1, 2]:
            raise ValueError("FourRoom difficulty must be in [0, 1, 2]")

        # Common parameters
        nrows = 9
        ncols = 9
        start_coord = (0, 0)
        terminal_states = ((8, 0), )
        success_probability = 0.95
        #
        walls = ()
        for ii in range(9):
            if ii not in [2, 6]:
                walls += ((ii, 4), )
        for jj in range(9):
            if jj != 7:
                walls += ((4, jj), )

        # Default reward according to the difficulty
        if difficulty in [0, 1]:
            default_reward = 0.0
        elif difficulty == 2:
            default_reward = -0.005

        # Rewards according to the difficulty
        if self.reward_free:
            reward_at = {}
        else:
            if difficulty == 0:
                reward_at = {(8, 0): 1.0}
            elif difficulty in [1, 2]:
                reward_at = {
                    (8, 0): 1.0,
                    (3, 3): 0.1,
                }

        # Init base class
        GridWorld.__init__(
            self,
            nrows=nrows,
            ncols=ncols,
            start_coord=start_coord,
            terminal_states=terminal_states,
            success_probability=success_probability,
            reward_at=reward_at,
            walls=walls,
            default_reward=default_reward,
        )

        # spaces
        if self.array_observation:
            self.observation_space = spaces.Box(0.0, 1.0, shape=(2, ))
Ejemplo n.º 4
0
    def __init__(self, noise_room1=0.01, noise_room2=0.01):
        Model.__init__(self)
        RenderInterface2D.__init__(self)

        self.noise_room1 = noise_room1
        self.noise_room2 = noise_room2

        self.observation_space = spaces.Box(
            low=np.array([0.0, 0.0]),
            high=np.array([2.0, 1.0]),
        )
        self.action_space = spaces.Discrete(4)
        self.reward_range = (0.0, 1.0)

        self.room_noises = [noise_room1, noise_room2]

        # environment parameters
        self.action_displacement = 0.1
        self.wall_eps = 0.05

        # base reward position
        self.base_reward_pos = np.array([0.8, 0.8])

        # rendering info
        self.set_clipping_area((0, 2, 0, 1))
        self.set_refresh_interval(100)  # in milliseconds
        self.renderer_type = "opengl"

        # reset
        self.reset()
Ejemplo n.º 5
0
    def __init__(self, reward_free=False, array_observation=False):
        self.reward_free = reward_free
        self.array_observation = array_observation

        # Common parameters
        nrows = 13
        ncols = 17
        start_coord = (5, 1)
        terminal_states = ((7, 7),)
        success_probability = 0.95
        #
        walls = ()
        for ii in range(13):
            walls += ((ii, 0),)
            walls += ((ii, 16),)
        for jj in range(17):
            walls += ((0, jj),)
            walls += ((12, jj),)
        for ii in range(13):
            if ii not in [1, 11]:
                walls += ((ii, 6),)
                walls += ((ii, 10),)
        walls += ((11, 6),)
        for jj in range(17):
            if jj not in [1, 15]:
                walls += ((6, jj),)

        # Default reward according to the difficulty
        default_reward = 0

        # Rewards according to the difficulty
        if self.reward_free:
            reward_at = {}
        else:
            reward_at = {
                        (7, 7): 10.0,
                        (8, 2): 1.0,
                        (10, 3): 1.0
                        }
            for jj in range(7, 16):
                for ii in range(1, 12):
                    if (ii, jj) not in walls and (ii, jj) != (7, 7):
                        reward_at[(ii, jj)] = -0.05

        # Init base class
        GridWorld.__init__(self,
                           nrows=nrows,
                           ncols=ncols,
                           start_coord=start_coord,
                           terminal_states=terminal_states,
                           success_probability=success_probability,
                           reward_at=reward_at,
                           walls=walls,
                           default_reward=default_reward)

        # spaces
        if self.array_observation:
            self.observation_space = spaces.Box(0.0, 1.0, shape=(2,))
Ejemplo n.º 6
0
    def __init__(self, reward_free=False, array_observation=False):
        self.reward_free = reward_free
        self.array_observation = array_observation

        # Common parameters
        nrows = 11
        ncols = 17
        start_coord = (0, 0)
        terminal_states = ((10, 0), )
        success_probability = 0.95
        #
        walls = ()
        for ii in range(11):
            if ii not in [2, 8]:
                walls += ((ii, 5), )
                walls += ((ii, 11), )
        for jj in range(17):
            if jj != 15:
                walls += ((5, jj), )

        # Default reward according to the difficulty
        default_reward = -0.001

        # Rewards according to the difficulty
        if self.reward_free:
            reward_at = {}
        else:
            reward_at = {
                (10, 0): 10.0,
                (4, 4): 0.1,
            }

        # Init base class
        GridWorld.__init__(
            self,
            nrows=nrows,
            ncols=ncols,
            start_coord=start_coord,
            terminal_states=terminal_states,
            success_probability=success_probability,
            reward_at=reward_at,
            walls=walls,
            default_reward=default_reward,
        )

        # spaces
        if self.array_observation:
            self.observation_space = spaces.Box(0.0, 1.0, shape=(2, ))
Ejemplo n.º 7
0
    def __init__(self):
        # init base classes
        Model.__init__(self)
        RenderInterface2D.__init__(self)
        self.reward_range = (-1.0, 0.0)

        # rendering info
        bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2
        # (left, right, bottom, top)
        self.set_clipping_area((-bound, bound, -bound, bound))
        self.set_refresh_interval(10)  # in milliseconds

        # observation and action spaces
        high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
        low = -high
        self.observation_space = spaces.Box(low=low, high=high)
        self.action_space = spaces.Discrete(3)

        # initialize
        self.state = None
        self.reset()
Ejemplo n.º 8
0
    def __init__(self, p, action_list, reward_amplitudes, reward_smoothness,
                 reward_centers, A, B, sigma, sigma_init, mu_init):
        """
        Parameters
        -----------
        p : int
            parameter of the p-norm
        action_list : list
            list of actions {u_1, ..., u_m}, each action u_i is a
            d'-dimensional array
        reward_amplitudes: list
            list of reward amplitudes: {b_1, ..., b_n}
        reward_smoothness : list
            list of reward smoothness: {c_1, ..., c_n}
        reward_centers : list
            list of reward centers:    {x_1, ..., x_n}
        A : numpy.ndarray
            array A of size (d, d)
        B : numpy.ndarray
            array B of size (d, d')
        sigma : double
            transition noise sigma
        sigma_init : double
            initial state noise sigma_init
        mu_init : numpy.ndarray
            array of size (d,) containing the mean of the initial state
        """
        Model.__init__(self)

        assert p >= 1, "PBall requires p>=1"
        if p not in [2, np.inf]:
            logger.warning("For p!=2 or p!=np.inf, PBall \
does not make true projections onto the lp ball.")
        self.p = p
        self.d, self.dp = B.shape  # d and d'
        self.m = len(action_list)
        self.action_list = action_list
        self.reward_amplitudes = reward_amplitudes
        self.reward_smoothness = reward_smoothness
        self.reward_centers = reward_centers
        self.A = A
        self.B = B
        self.sigma = sigma
        self.sigma_init = sigma_init
        self.mu_init = mu_init

        # State and action spaces
        low = -1.0 * np.ones(self.d, dtype=np.float64)
        high = np.ones(self.d, dtype=np.float64)
        self.observation_space = spaces.Box(low, high)
        self.action_space = spaces.Discrete(self.m)

        # reward range
        assert len(self.reward_amplitudes) == len(self.reward_smoothness)
        assert len(self.reward_amplitudes) == len(self.reward_centers)
        if len(self.reward_amplitudes) > 0:
            assert self.reward_amplitudes.max() <= 1.0 and \
                self.reward_amplitudes.min() >= 0.0, \
                "reward amplitudes b_i must be in [0, 1]"
            assert self.reward_smoothness.min() > 0.0, \
                "reward smoothness c_i must be > 0"
        self.reward_range = (0, 1.0)

        #
        self.name = "Lp-Ball"

        # Initalize state
        self.reset()
Ejemplo n.º 9
0
    def __init__(
        self,
        nrooms=7,
        reward_free=False,
        array_observation=False,
        room_size=5,
        success_probability=0.95,
        remove_walls=False,
        initial_state_distribution="center",
        include_traps=False,
    ):

        assert nrooms > 0, "nrooms must be > 0"
        assert initial_state_distribution in ("center", "uniform")

        self.reward_free = reward_free
        self.array_observation = array_observation
        self.nrooms = nrooms
        self.room_size = room_size
        self.success_probability = success_probability
        self.remove_walls = remove_walls
        self.initial_state_distribution = initial_state_distribution
        self.include_traps = include_traps

        # Max number of rooms/columns per row
        self.max_rooms_per_row = 5

        # Room size (default = 5x5)
        self.room_size = room_size

        # Grid size
        self.room_nrows = math.ceil(nrooms / self.max_rooms_per_row)
        if self.room_nrows > 1:
            self.room_ncols = self.max_rooms_per_row
        else:
            self.room_ncols = nrooms
        nrows = self.room_size * self.room_nrows + (self.room_nrows - 1)
        ncols = self.room_size * self.room_ncols + (self.room_ncols - 1)

        # # walls
        walls = []
        for room_col in range(self.room_ncols - 1):
            col = (room_col + 1) * (self.room_size + 1) - 1
            for jj in range(nrows):
                if (jj % (self.room_size + 1)) != (self.room_size // 2):
                    walls.append((jj, col))

        for room_row in range(self.room_nrows - 1):
            row = (room_row + 1) * (self.room_size + 1) - 1
            for jj in range(ncols):
                walls.append((row, jj))

        # process each room
        start_coord = None
        terminal_state = None
        self.traps = []
        count = 0
        for room_r in range(self.room_nrows):
            if room_r % 2 == 0:
                cols_iterator = range(self.room_ncols)
            else:
                cols_iterator = reversed(range(self.room_ncols))
            for room_c in cols_iterator:
                # existing rooms
                if count < self.nrooms:
                    # remove top wall
                    if ((room_c == self.room_ncols - 1) and (room_r % 2 == 0)) or (
                        (room_c == 0) and (room_r % 2 == 1)
                    ):
                        if room_r != self.room_nrows - 1:
                            wall_to_remove = self._convert_room_coord_to_global(
                                room_r, room_c, self.room_size, self.room_size // 2
                            )
                            if wall_to_remove in walls:
                                walls.remove(wall_to_remove)
                # rooms to remove
                else:
                    for ii in range(-1, self.room_size + 1):
                        for jj in range(-1, self.room_size + 1):
                            wall_to_include = self._convert_room_coord_to_global(
                                room_r, room_c, ii, jj
                            )
                            if (
                                wall_to_include[0] >= 0
                                and wall_to_include[0] < nrows
                                and wall_to_include[1] >= 0
                                and wall_to_include[1] < ncols
                                and (wall_to_include not in walls)
                            ):
                                walls.append(wall_to_include)
                    pass

                # start coord
                if count == nrooms // 2:
                    start_coord = self._convert_room_coord_to_global(
                        room_r, room_c, self.room_size // 2, self.room_size // 2
                    )
                # terminal state
                if count == nrooms - 1:
                    terminal_state = self._convert_room_coord_to_global(
                        room_r, room_c, self.room_size // 2, self.room_size // 2
                    )
                # trap
                if include_traps:
                    self.traps.append(
                        self._convert_room_coord_to_global(
                            room_r,
                            room_c,
                            self.room_size // 2 + 1,
                            self.room_size // 2 + 1,
                        )
                    )
                count += 1

        terminal_states = (terminal_state,) + tuple(self.traps)

        if self.reward_free:
            reward_at = {}
        else:
            reward_at = {
                terminal_state: 1.0,
                start_coord: 0.01,
                (self.room_size // 2, self.room_size // 2): 0.1,
            }

        # Check remove_walls
        if remove_walls:
            walls = ()

        # Init base class
        GridWorld.__init__(
            self,
            nrows=nrows,
            ncols=ncols,
            start_coord=start_coord,
            terminal_states=terminal_states,
            success_probability=success_probability,
            reward_at=reward_at,
            walls=walls,
            default_reward=0.0,
        )

        # Check initial distribution
        if initial_state_distribution == "uniform":
            distr = np.ones(self.observation_space.n) / self.observation_space.n
            self.set_initial_state_distribution(distr)

        # spaces
        if self.array_observation:
            self.discrete_observation_space = self.observation_space
            self.observation_space = spaces.Box(0.0, 1.0, shape=(2,))