Python Viewer.line Exemples, mushroom_rl.utils.viewer.Viewer.line Python Exemples

Exemple #1

0

Afficher le fichier

class ShipSteering(Environment):
    """
    The Ship Steering environment as presented in:
    "Hierarchical Policy Gradient Algorithms". Ghavamzadeh M. and Mahadevan S..
    2013.

    """
    def __init__(self, small=True, n_steps_action=3):
        """
        Constructor.

        Args:
             small (bool, True): whether to use a small state space or not.
             n_steps_action (int, 3): number of integration intervals for each
                                      step of the mdp.

        """
        # MDP parameters
        self.field_size = 150 if small else 1000
        low = np.array([0, 0, -np.pi, -np.pi / 12.])
        high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.])
        self.omega_max = np.array([np.pi / 12.])
        self._v = 3.
        self._T = 5.
        self._dt = .2
        self._gate_s = np.empty(2)
        self._gate_e = np.empty(2)
        self._gate_s[0] = 100 if small else 350
        self._gate_s[1] = 120 if small else 400
        self._gate_e[0] = 120 if small else 450
        self._gate_e[1] = 100 if small else 400
        self._out_reward = -100
        self._success_reward = 0
        self._small = small
        self._state = None
        self.n_steps_action = n_steps_action

        # MDP properties
        observation_space = spaces.Box(low=low, high=high)
        action_space = spaces.Box(low=-self.omega_max, high=self.omega_max)
        horizon = 5000
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(self.field_size,
                              self.field_size,
                              background=(66, 131, 237))

        super().__init__(mdp_info)

    def reset(self, state=None):
        if state is None:
            if self._small:
                self._state = np.zeros(4)
                self._state[2] = np.pi / 2
            else:
                low = self.info.observation_space.low
                high = self.info.observation_space.high
                self._state = (high - low) * np.random.rand(4) + low
        else:
            self._state = state

        return self._state

    def step(self, action):

        r = self._bound(action[0], -self.omega_max, self.omega_max)

        new_state = self._state

        for _ in range(self.n_steps_action):
            state = new_state
            new_state = np.empty(4)
            new_state[0] = state[0] + self._v * np.cos(state[2]) * self._dt
            new_state[1] = state[1] + self._v * np.sin(state[2]) * self._dt
            new_state[2] = normalize_angle(state[2] + state[3] * self._dt)
            new_state[3] = state[3] + (r - state[3]) * self._dt / self._T

            if new_state[0] > self.field_size \
               or new_state[1] > self.field_size \
               or new_state[0] < 0 or new_state[1] < 0:

                new_state[0] = self._bound(new_state[0], 0, self.field_size)
                new_state[1] = self._bound(new_state[1], 0, self.field_size)

                reward = self._out_reward
                absorbing = True
                break

            elif self._through_gate(state[:2], new_state[:2]):
                reward = self._success_reward
                absorbing = True
                break
            else:
                reward = -1
                absorbing = False

        self._state = new_state

        return self._state, reward, absorbing, {}

    def render(self, mode='human'):
        self._viewer.line(self._gate_s, self._gate_e, width=3)

        boat = [[-4, -4], [-4, 4], [4, 4], [8, 0.0], [4, -4]]
        self._viewer.polygon(self._state[:2],
                             self._state[2],
                             boat,
                             color=(32, 193, 54))

        self._viewer.display(self._dt)

    def stop(self):
        self._viewer.close()

    def _through_gate(self, start, end):
        r = self._gate_e - self._gate_s
        s = end - start
        den = self._cross_2d(vecr=r, vecs=s)

        if den == 0:
            return False

        t = self._cross_2d((start - self._gate_s), s) / den
        u = self._cross_2d((start - self._gate_s), r) / den

        return 1 >= u >= 0 and 1 >= t >= 0

    @staticmethod
    def _cross_2d(vecr, vecs):
        return vecr[0] * vecs[1] - vecr[1] * vecs[0]

Exemple #2

0

Afficher le fichier

Fichier : cart_pole.py Projet : PuzeLiu/mushroom-rl

class CartPole(Environment):
    """
    The Inverted Pendulum on a Cart environment as presented in:
    "Least-Squares Policy Iteration". Lagoudakis M. G. and Parr R.. 2003.

    """
    def __init__(self,
                 m=2.,
                 M=8.,
                 l=.5,
                 g=9.8,
                 mu=1e-2,
                 max_u=50.,
                 noise_u=10.,
                 horizon=3000,
                 gamma=.95):
        """
        Constructor.

        Args:
            m (float, 2.0): mass of the pendulum;
            M (float, 8.0): mass of the cart;
            l (float, .5): length of the pendulum;
            g (float, 9.8): gravity acceleration constant;
            max_u (float, 50.): maximum allowed input torque;
            noise_u (float, 10.): maximum noise on the action;
            horizon (int, 3000): horizon of the problem;
            gamma (float, .95): discount factor.

        """
        # MDP parameters
        self._m = m
        self._M = M
        self._l = l
        self._g = g
        self._alpha = 1 / (self._m + self._M)
        self._mu = mu
        self._dt = .1
        self._max_u = max_u
        self._noise_u = noise_u
        high = np.array([np.inf, np.inf])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(3)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(2.5 * l, 2.5 * l)
        self._last_u = None
        self._state = None

        super().__init__(mdp_info)

    def reset(self, state=None):
        if state is None:
            angle = np.random.uniform(-np.pi / 8., np.pi / 8.)

            self._state = np.array([angle, 0.])
        else:
            self._state = state
            self._state[0] = normalize_angle(self._state[0])

        self._last_u = 0
        return self._state

    def step(self, action):
        if action == 0:
            u = -self._max_u
        elif action == 1:
            u = 0.
        else:
            u = self._max_u

        self._last_u = u

        u += np.random.uniform(-self._noise_u, self._noise_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, ))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])

        if np.abs(self._state[0]) > np.pi * .5:
            reward = -1.
            absorbing = True
        else:
            reward = 0.
            absorbing = False

        return self._state, reward, absorbing, {}

    def render(self, mode='human'):
        start = 1.25 * self._l * np.ones(2)
        end = 1.25 * self._l * np.ones(2)

        end[0] += self._l * np.sin(self._state[0])
        end[1] += self._l * np.cos(self._state[0])

        self._viewer.line(start, end)
        self._viewer.square(start, 0, self._l / 10)
        self._viewer.circle(end, self._l / 20)

        direction = -np.sign(self._last_u) * np.array([1, 0])
        value = np.abs(self._last_u)
        self._viewer.force_arrow(start, direction, value, self._max_u,
                                 self._l / 5)

        self._viewer.display(self._dt)

    def stop(self):
        self._viewer.close()

    def _dynamics(self, state, t, u):
        theta = state[0]
        omega = state[1]

        d_theta = omega
        d_omega = (self._g * np.sin(theta) - self._alpha * self._m * self._l *
                   .5 * d_theta**2 * np.sin(2 * theta) * .5 -
                   self._alpha * np.cos(theta) * u) / (
                       2 / 3 * self._l -
                       self._alpha * self._m * self._l * .5 * np.cos(theta)**2)

        return d_theta, d_omega

Exemple #3

0

Afficher le fichier

class Segway(Environment):
    """
    The Segway environment (continuous version) as presented in:
    "Deep Learning for Actor-Critic Reinforcement Learning". Xueli Jia. 2015.

    """
    def __init__(self, random_start=False):
        """
        Constructor.

        Args:
            random_start (bool, False): whether to start from a random position
                or from the horizontal one.

        """
        # MDP parameters
        gamma = 0.97

        self._Mr = 0.3 * 2
        self._Mp = 2.55
        self._Ip = 2.6e-2
        self._Ir = 4.54e-4 * 2
        self._l = 13.8e-2
        self._r = 5.5e-2
        self._dt = 1e-2
        self._g = 9.81
        self._max_u = 5

        self._random = random_start

        high = np.array([-np.pi / 2, 15, 75])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Box(low=np.array([-self._max_u]),
                                  high=np.array([self._max_u]))
        horizon = 300
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(5 * self._l, 5 * self._l)
        self._last_x = 0

        super().__init__(mdp_info)

    def reset(self, state=None):
        if state is None:
            if self._random:
                angle = np.random.uniform(-np.pi / 2, np.pi / 2)
            else:
                angle = -np.pi / 8

            self._state = np.array([angle, 0., 0.])
        else:
            self._state = state
            self._state[0] = normalize_angle(self._state[0])

        self._last_x = 0

        return self._state

    def step(self, action):
        u = self._bound(action[0], -self._max_u, self._max_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt], (u, ))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])

        if abs(self._state[0]) > np.pi / 2:
            absorbing = True
            reward = -10000
        else:
            absorbing = False
            Q = np.diag([3.0, 0.1, 0.1])

            x = self._state

            J = x.dot(Q).dot(x)

            reward = -J

        return self._state, reward, absorbing, {}

    def _dynamics(self, state, t, u):

        alpha = state[0]
        d_alpha = state[1]

        h1 = (self._Mr + self._Mp) * (self._r**2) + self._Ir
        h2 = self._Mp * self._r * self._l * np.cos(alpha)
        h3 = self._l**2 * self._Mp + self._Ip

        omegaP = d_alpha

        dOmegaP = -(h2 * self._l * self._Mp * self._r * np.sin(alpha) * omegaP
                    **2 - self._g * h1 * self._l * self._Mp * np.sin(alpha) +
                    (h2 + h1) * u) / (h1 * h3 - h2**2)
        dOmegaR = (
            h3 * self._l * self._Mp * self._r * np.sin(alpha) * omegaP**2 -
            self._g * h2 * self._l * self._Mp * np.sin(alpha) +
            (h3 + h2) * u) / (h1 * h3 - h2**2)

        dx = list()
        dx.append(omegaP)
        dx.append(dOmegaP)
        dx.append(dOmegaR)

        return dx

    def render(self, mode='human'):
        start = 2.5 * self._l * np.ones(2)
        end = 2.5 * self._l * np.ones(2)

        dx = -self._state[2] * self._r * self._dt

        self._last_x += dx

        if self._last_x > 2.5 * self._l or self._last_x < -2.5 * self._l:
            self._last_x = (2.5 * self._l +
                            self._last_x) % (5 * self._l) - 2.5 * self._l

        start[0] += self._last_x
        end[0] += -2 * self._l * np.sin(self._state[0]) + self._last_x
        end[1] += 2 * self._l * np.cos(self._state[0])

        if (start[0] > 5 * self._l and end[0] > 5 * self._l) \
                or (start[0] < 0 and end[0] < 0):
            start[0] = start[0] % 5 * self._l
            end[0] = end[0] % 5 * self._l

        self._viewer.line(start, end)
        self._viewer.circle(start, self._r)

        self._viewer.display(self._dt)

Exemple #4

0

Afficher le fichier

Fichier : grid_world.py Projet : ml-research/mushroom-rl

class AbstractGridWorld(Environment):
    """
    Abstract class to build a grid world.

    """
    def __init__(self, mdp_info, height, width, start, goal):
        """
        Constructor.

        Args:
            height (int): height of the grid;
            width (int): width of the grid;
            start (tuple): x-y coordinates of the goal;
            goal (tuple): x-y coordinates of the goal.

        """
        assert not np.array_equal(start, goal)

        assert goal[0] < height and goal[1] < width,\
            'Goal position not suitable for the grid world dimension.'

        self._state = None
        self._height = height
        self._width = width
        self._start = start
        self._goal = goal

        # Visualization
        self._viewer = Viewer(self._width, self._height, 500,
                              self._height * 500 // self._width)

        super().__init__(mdp_info)

    def reset(self, state=None):
        if state is None:
            state = self.convert_to_int(self._start, self._width)

        self._state = state

        return self._state

    def step(self, action):
        state = self.convert_to_grid(self._state, self._width)

        new_state, reward, absorbing, info = self._step(state, action)
        self._state = self.convert_to_int(new_state, self._width)

        return self._state, reward, absorbing, info

    def render(self):
        for row in range(1, self._height):
            for col in range(1, self._width):
                self._viewer.line(np.array([col, 0]),
                                  np.array([col, self._height]))
                self._viewer.line(np.array([0, row]),
                                  np.array([self._width, row]))

        goal_center = np.array(
            [.5 + self._goal[1], self._height - (.5 + self._goal[0])])
        self._viewer.square(goal_center, 0, 1, (0, 255, 0))

        start_grid = self.convert_to_grid(self._start, self._width)
        start_center = np.array(
            [.5 + start_grid[1], self._height - (.5 + start_grid[0])])
        self._viewer.square(start_center, 0, 1, (255, 0, 0))

        state_grid = self.convert_to_grid(self._state, self._width)
        state_center = np.array(
            [.5 + state_grid[1], self._height - (.5 + state_grid[0])])
        self._viewer.circle(state_center, .4, (0, 0, 255))

        self._viewer.display(.1)

    def _step(self, state, action):
        raise NotImplementedError('AbstractGridWorld is an abstract class.')

    def _grid_step(self, state, action):
        if action == 0:
            if state[0] > 0:
                state[0] -= 1
        elif action == 1:
            if state[0] + 1 < self._height:
                state[0] += 1
        elif action == 2:
            if state[1] > 0:
                state[1] -= 1
        elif action == 3:
            if state[1] + 1 < self._width:
                state[1] += 1

    @staticmethod
    def convert_to_grid(state, width):
        return np.array([state[0] // width, state[0] % width])

    @staticmethod
    def convert_to_int(state, width):
        return np.array([state[0] * width + state[1]])

Exemple #5

0

Afficher le fichier

Fichier : inverted_pendulum.py Projet : ml-research/mushroom-rl

class InvertedPendulum(Environment):
    """
    The Inverted Pendulum environment (continuous version) as presented in:
    "Reinforcement Learning In Continuous Time and Space". Doya K.. 2000.
    "Off-Policy Actor-Critic". Degris T. et al.. 2012.
    "Deterministic Policy Gradient Algorithms". Silver D. et al. 2014.

    """
    def __init__(self, random_start=False, m=1., l=1., g=9.8, mu=1e-2,
                 max_u=5., horizon=5000, gamma=.99):
        """
        Constructor.

        Args:
            random_start (bool, False): whether to start from a random position
                or from the horizontal one;
            m (float, 1.0): mass of the pendulum;
            l (float, 1.0): length of the pendulum;
            g (float, 9.8): gravity acceleration constant;
            mu (float, 1e-2): friction constant of the pendulum;
            max_u (float, 5.0): maximum allowed input torque;
            horizon (int, 5000): horizon of the problem;
            gamma (int, .99): discount factor.

        """
        # MDP parameters
        self._m = m
        self._l = l
        self._g = g
        self._mu = mu
        self._random = random_start
        self._dt = .01
        self._max_u = max_u
        self._max_omega = 5 / 2 * np.pi
        high = np.array([np.pi, self._max_omega])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Box(low=np.array([-max_u]),
                                  high=np.array([max_u]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(2.5 * l, 2.5 * l)
        self._last_u = None

        super().__init__(mdp_info)

    def reset(self, state=None):
        if state is None:
            if self._random:
                angle = np.random.uniform(-np.pi, np.pi)
            else:
                angle = np.pi / 2

            self._state = np.array([angle, 0.])
        else:
            self._state = state
            self._state[0] = normalize_angle(self._state[0])
            self._state[1] = self._bound(self._state[1], -self._max_omega,
                                         self._max_omega)

        self._last_u = 0.0
        return self._state

    def step(self, action):
        u = self._bound(action[0], -self._max_u, self._max_u)
        new_state = odeint(self._dynamics, self._state, [0, self._dt],
                           (u,))

        self._state = np.array(new_state[-1])
        self._state[0] = normalize_angle(self._state[0])
        self._state[1] = self._bound(self._state[1], -self._max_omega,
                                     self._max_omega)

        reward = np.cos(self._state[0])

        self._last_u = u.item()

        return self._state, reward, False, {}

    def render(self, mode='human'):
        start = 1.25 * self._l * np.ones(2)
        end = 1.25 * self._l * np.ones(2)

        end[0] += self._l * np.sin(self._state[0])
        end[1] += self._l * np.cos(self._state[0])

        self._viewer.line(start, end)
        self._viewer.circle(start, self._l / 40)
        self._viewer.circle(end, self._l / 20)
        self._viewer.torque_arrow(start, -self._last_u, self._max_u,
                                  self._l / 5)

        self._viewer.display(self._dt)

    def stop(self):
        self._viewer.close()

    def _dynamics(self, state, t, u):
        theta = state[0]
        omega = self._bound(state[1], -self._max_omega, self._max_omega)

        d_theta = omega
        d_omega = (-self._mu * omega + self._m * self._g * self._l * np.sin(
            theta) + u) / (self._m * self._l**2)

        return d_theta, d_omega