Beispiel #1
0
    def __init__(self, goal_velocity=0):
        # init base classes
        Model.__init__(self)
        RenderInterface2D.__init__(self)

        self.min_position = -1.2
        self.max_position = 0.6
        self.max_speed = 0.07
        self.goal_position = 0.5
        self.goal_velocity = goal_velocity

        self.force = 0.001
        self.gravity = 0.0025

        self.low = np.array([self.min_position, -self.max_speed])
        self.high = np.array([self.max_position, self.max_speed])

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(self.low, self.high)

        self.reward_range = (0.0, 1.0)

        # rendering info
        self.set_clipping_area((-1.2, 0.6, -0.2, 1.1))
        self.set_refresh_interval(10)  # in milliseconds

        # initial reset
        self.reset()
Beispiel #2
0
    def __init__(self):
        # init base classes
        Model.__init__(self)
        RenderInterface2D.__init__(self)

        # environment parameters
        self.max_speed = 8.
        self.max_torque = 2.
        self.dt = 0.5
        self.gravity = 10.
        self.mass = 1.
        self.length = 1.

        # rendering info
        self.set_clipping_area((-2.2, 2.2, -2.2, 2.2))
        self.set_refresh_interval(10)

        # observation and action spaces
        high = np.array([1., 1., self.max_speed], dtype=np.float32)
        low = -high
        self.action_space = spaces.Box(low=-self.max_torque,
                                       high=self.max_torque,
                                       shape=(1, ),
                                       dtype=np.float32)
        self.observation_space = spaces.Box(low=low,
                                            high=high,
                                            dtype=np.float32)

        # initialize
        self.reset()
Beispiel #3
0
    def __init__(self, env):
        # Init base class
        Model.__init__(self)

        # Save reference to env
        self.env = env

        self.observation_space = self.env.observation_space
        self.action_space = self.env.action_space
        self.reward_range = self.env.reward_range

        # If gym environment, reseeding is necessary here for
        # reproducibility.
        if isinstance(env, gym.Env):
            self.reseed()
Beispiel #4
0
    def __init__(self, env):
        # Init base class
        Model.__init__(self)

        # Save reference to env
        self.env = env

        self.observation_space = self.env.observation_space
        self.action_space = self.env.action_space
        self.metadata = self.env.metadata

        try:
            self.reward_range = self.env.reward_range
        except AttributeError:
            self.reward_range = (-np.inf, np.inf)

        # If gym environment, reseeding is necessary here for
        # reproducibility.
        if isinstance(env, gym.Env):
            self.reseed()
Beispiel #5
0
    def __init__(self, env, wrap_spaces=False):
        # Init base class
        Model.__init__(self)

        # Save reference to env
        self.env = env
        self.metadata = self.env.metadata

        if wrap_spaces:
            self.observation_space = convert_space_from_gym(
                self.env.observation_space)
            self.action_space = convert_space_from_gym(self.env.action_space)
        else:
            self.observation_space = self.env.observation_space
            self.action_space = self.env.action_space

        try:
            self.reward_range = self.env.reward_range
        except AttributeError:
            self.reward_range = (-np.inf, np.inf)
Beispiel #6
0
    def __init__(self):
        # init base classes
        Model.__init__(self)
        RenderInterface2D.__init__(self)
        self.reward_range = (-1.0, 0.0)

        # rendering info
        bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2
        # (left, right, bottom, top)
        self.set_clipping_area((-bound, bound, -bound, bound))
        self.set_refresh_interval(10)  # in milliseconds

        # observation and action spaces
        high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
        low = -high
        self.observation_space = spaces.Box(low=low, high=high)
        self.action_space = spaces.Discrete(3)

        # initialize
        self.state = None
        self.reset()
Beispiel #7
0
    def __init__(self, R, P, initial_state_distribution=0):
        Model.__init__(self)
        self.initial_state_distribution = initial_state_distribution
        S, A = R.shape

        self.S = S
        self.A = A

        self.R = R
        self.P = P

        self.observation_space = spaces.Discrete(S)
        self.action_space = spaces.Discrete(A)
        self.reward_range = (self.R.min(), self.R.max())

        self.state = None

        self._states = np.arange(S)
        self._actions = np.arange(A)

        self.reset()
        self._check()
Beispiel #8
0
    def __init__(self, p, action_list, reward_amplitudes, reward_smoothness,
                 reward_centers, A, B, sigma, sigma_init, mu_init):
        """
        Parameters
        -----------
        p : int
            parameter of the p-norm
        action_list : list
            list of actions {u_1, ..., u_m}, each action u_i is a
            d'-dimensional array
        reward_amplitudes: list
            list of reward amplitudes: {b_1, ..., b_n}
        reward_smoothness : list
            list of reward smoothness: {c_1, ..., c_n}
        reward_centers : list
            list of reward centers:    {x_1, ..., x_n}
        A : numpy.ndarray
            array A of size (d, d)
        B : numpy.ndarray
            array B of size (d, d')
        sigma : double
            transition noise sigma
        sigma_init : double
            initial state noise sigma_init
        mu_init : numpy.ndarray
            array of size (d,) containing the mean of the initial state
        """
        Model.__init__(self)

        assert p >= 1, "PBall requires p>=1"
        if p not in [2, np.inf]:
            logger.warning("For p!=2 or p!=np.inf, PBall \
does not make true projections onto the lp ball.")
        self.p = p
        self.d, self.dp = B.shape  # d and d'
        self.m = len(action_list)
        self.action_list = action_list
        self.reward_amplitudes = reward_amplitudes
        self.reward_smoothness = reward_smoothness
        self.reward_centers = reward_centers
        self.A = A
        self.B = B
        self.sigma = sigma
        self.sigma_init = sigma_init
        self.mu_init = mu_init

        # State and action spaces
        low = -1.0 * np.ones(self.d, dtype=np.float64)
        high = np.ones(self.d, dtype=np.float64)
        self.observation_space = spaces.Box(low, high)
        self.action_space = spaces.Discrete(self.m)

        # reward range
        assert len(self.reward_amplitudes) == len(self.reward_smoothness)
        assert len(self.reward_amplitudes) == len(self.reward_centers)
        if len(self.reward_amplitudes) > 0:
            assert self.reward_amplitudes.max() <= 1.0 and \
                self.reward_amplitudes.min() >= 0.0, \
                "reward amplitudes b_i must be in [0, 1]"
            assert self.reward_smoothness.min() > 0.0, \
                "reward smoothness c_i must be > 0"
        self.reward_range = (0, 1.0)

        #
        self.name = "Lp-Ball"

        # Initalize state
        self.reset()
Beispiel #9
0
 def __init__(self, rewards=[], **kwargs):
     Model.__init__(self, **kwargs)
     self.n_arms = rewards.shape[1]
     self.rewards = deque(rewards)
     self.action_space = spaces.Discrete(self.n_arms)
Beispiel #10
0
 def __init__(self, laws=[], **kwargs):
     Model.__init__(self, **kwargs)
     self.laws = laws
     self.n_arms = len(self.laws)
     self.action_space = spaces.Discrete(self.n_arms)