Esempio n. 1
0
class RosenSim(SimEnv, Serializable):
    """ This environment wraps the Rosenbrock function to use it as a test case for Pyrado algorithms. """

    name: str = 'rosen'

    def __init__(self):
        """ Constructor """
        Serializable._init(self, locals())

        # Initialize basic variables
        super().__init__(dt=None, max_steps=1)

        # Set the bounds for the system's states adn actions
        max_state = np.array([100., 100.])
        max_act = max_state
        self._curr_act = np.zeros_like(
            max_act)  # just for usage in render function

        self._state_space = BoxSpace(-max_state,
                                     max_state,
                                     labels=['x_1', 'x_2'])
        self._init_space = SingularStateSpace(np.zeros(
            self._state_space.shape),
                                              labels=['x_1_init', 'x_2_init'])
        self._act_space = BoxSpace(-max_act,
                                   max_act,
                                   labels=['x_1_next', 'x_2_next'])

        # Define the task including the reward function
        self._task = self._create_task()

        # Animation with pyplot
        self._anim = dict(fig=None, trace_x=[], trace_y=[], trace_z=[])

    @property
    def state_space(self):
        return self._state_space

    @property
    def obs_space(self):
        return self._state_space

    @property
    def init_space(self):
        return self._init_space

    @property
    def act_space(self):
        return self._act_space

    def _create_task(self, task_args: dict = None) -> OptimProxyTask:
        return OptimProxyTask(self.spec,
                              StateBasedRewFcn(rosenbrock, flip_sign=True))

    @property
    def task(self) -> OptimProxyTask:
        return self._task

    @property
    def domain_param(self):
        return {}

    @domain_param.setter
    def domain_param(self, param):
        pass

    @classmethod
    def get_nominal_domain_param(cls) -> dict:
        return {}

    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        # Reset time
        self._curr_step = 0

        # Reset the domain parameters
        if domain_param is not None:
            self.domain_param = domain_param

        # Reset the state
        if init_state is None:
            self.state = self._init_space.sample_uniform()  # zero
        else:
            if not init_state.shape == self.obs_space.shape:
                raise pyrado.ShapeErr(given=init_state,
                                      expected_match=self.obs_space)
            if isinstance(init_state, np.ndarray):
                self.state = init_state.copy()
            else:
                try:
                    self.state = np.array(init_state)
                except Exception:
                    raise pyrado.TypeErr(given=init_state,
                                         expected_type=[np.ndarray, list])

        # No need to reset the task

        # Return perfect observation
        return self.observe(self.state)

    def step(self, act):
        # Apply actuator limits
        act = self.limit_act(act)

        # Action equal selection a new state a.k.a. solution of the optimization problem
        self.state = act

        # Current reward depending on the state after the step (since there is only one step)
        self._curr_rew = self.task.step_rew(self.state)

        self._curr_step += 1

        # Check if the task or the environment is done
        done = self.task.is_done(self.state)
        if self._curr_step >= self._max_steps:
            done = True

        return self.observe(self.state), self._curr_rew, done, {}

    def render(self, mode: RenderMode, render_step: int = 1):
        # Call base class
        super().render(mode)

        # Print to console
        if mode.text:
            if self._curr_step % render_step == 0 and self._curr_step > 0:  # skip the render before the first step
                print(
                    "step: {:3}  |  r_t: {: 1.3f}  |  a_t: {}\t |  s_t+1: {}".
                    format(self._curr_step, self._curr_rew, self._curr_act,
                           self.state))

        # Render using pyplot
        if mode.video:
            from matplotlib import pyplot as plt
            from pyrado.plotting.surface import draw_surface

            plt.ion()

            if self._anim['fig'] is None:
                # Plot Rosenbrock function once if not already plotted
                x = np.linspace(-2, 2, 20, True)
                y = np.linspace(-1, 3, 20, True)
                self._anim['fig'] = draw_surface(x, y, rosenbrock, 'x', 'y',
                                                 'z')

            self._anim['trace_x'].append(self.state[0])
            self._anim['trace_y'].append(self.state[1])
            self._anim['trace_z'].append(rosenbrock(self.state))

            ax = self._anim['fig'].gca()
            ax.scatter(self._anim['trace_x'],
                       self._anim['trace_y'],
                       self._anim['trace_z'],
                       s=8,
                       c='w')

            plt.draw()
Esempio n. 2
0
class CatapultSim(SimEnv, Serializable):
    """
    In this special environment, the action is equal to the policy parameter. Therefore, it makes only sense to
    use it in combination with a linear policy that has only one constant feature.
    """

    name: str = "cata"

    def __init__(self, max_steps: int, example_config: bool):
        """
        Constructor

        :param max_steps: maximum number of simulation steps
        :param example_config: configuration for the 'illustrative example' in the journal
        """
        Serializable._init(self, locals())

        super().__init__(dt=1, max_steps=max_steps)

        self.example_config = example_config
        self._planet = -1

        # Initialize the domain parameters (Earth)
        self._g = CatapultSim.get_nominal_domain_param()[
            "gravity_const"]  # planet's gravity constant [m/s**2]
        self._k = CatapultSim.get_nominal_domain_param()[
            "stiffness"]  # catapult spring's stiffness constant [N/m]
        self._x = CatapultSim.get_nominal_domain_param()[
            "elongation"]  # catapult spring's pre-elongation [m]

        # Domain independent parameter
        self._m = 70.0  # victim's mass [kg]

        # Set the bounds for the system's states adn actions
        max_state = np.array([1000.0])  # [m], arbitrary but >> self._x
        max_act = max_state
        self._curr_act = np.zeros_like(
            max_act)  # just for usage in render function

        self._state_space = BoxSpace(-max_state, max_state, labels=["h"])
        self._init_space = SingularStateSpace(np.zeros(
            self._state_space.shape),
                                              labels=["h_0"])
        self._act_space = BoxSpace(-max_act, max_act, labels=["theta"])

        # Define the task including the reward function
        self._task = self._create_task(task_args=dict())

    @property
    def state_space(self):
        return self._state_space

    @property
    def obs_space(self):
        return self._state_space

    @property
    def init_space(self):
        return self._init_space

    @property
    def act_space(self):
        return self._act_space

    def _create_task(self, task_args: dict) -> DesStateTask:
        # Define the task including the reward function
        state_des = task_args.get("state_des",
                                  np.zeros(self._state_space.shape))
        return DesStateTask(self.spec,
                            state_des,
                            rew_fcn=AbsErrRewFcn(q=np.array([1.0]),
                                                 r=np.array([0.0])))

    @property
    def task(self):
        return self._task

    @property
    def domain_param(self) -> dict:
        if self.example_config:
            return dict(planet=self._planet)
        else:
            return dict(gravity_const=self._g, k=self._k, x=self._x)

    @domain_param.setter
    def domain_param(self, domain_param: dict):
        assert isinstance(domain_param, dict)
        # Set the new domain params if given, else the default value
        if self.example_config:
            if domain_param["planet"] == 0:
                # Mars
                self._g = 3.71
                self._k = 1e3
                self._x = 0.5
            elif domain_param["planet"] == 1:
                # Venus
                self._g = 8.87
                self._k = 3e3
                self._x = 1.5
            elif domain_param["planet"] == -1:
                # Default value which should make the computation invalid
                self._g = None
                self._k = None
                self._x = None
            else:
                raise pyrado.ValueErr(given=domain_param["planet"],
                                      eq_constraint="0 or 1")

        else:
            assert self._g > 0 and self._k > 0 and self._x > 0
            self._g = domain_param.get("gravity_const", self._g)
            self._k = domain_param.get("stiffness", self._k)
            self._x = domain_param.get("elongation", self._x)

    @classmethod
    def get_nominal_domain_param(cls) -> dict:
        return dict(gravity_const=9.81, stiffness=2000.0, elongation=1.0)

    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        # Reset time
        self._curr_step = 0

        # Reset the domain parameters
        if domain_param is not None:
            self.domain_param = domain_param

        # Reset the state
        if init_state is None:
            # Sample from the init state space
            self.state = self._init_space.sample_uniform()
        else:
            if not init_state.shape == self.obs_space.shape:
                raise pyrado.ShapeErr(given=init_state,
                                      expected_match=self.obs_space)
            if isinstance(init_state, np.ndarray):
                self.state = init_state.copy()
            else:
                try:
                    self.state = np.asarray(init_state)
                except Exception:
                    raise pyrado.TypeErr(given=init_state,
                                         expected_type=np.ndarray)

        # Reset the task
        self._task.reset(env_spec=self.spec)

        # Return perfect observation
        return self.observe(self.state)

    def step(self, act: np.ndarray) -> tuple:
        # Apply actuator limits
        act = self.limit_act(act)  # dummy for CatapultSim
        self._curr_act = act  # just for the render function

        # Calculate the maximum height of the flight trajectory ("one step dynamics")
        self.state = self._k / (2.0 * self._m *
                                self._g) * (act - self._x)**2  # h(theta, xi)

        # Current reward depending on the state after the step (since there is only one step) and the (unlimited) action
        self._curr_rew = self.task.step_rew(self.state, act, self._curr_step)

        self._curr_step += 1

        # Check if the task or the environment is done
        done = self._task.is_done(self.state)
        if self._curr_step >= self._max_steps:
            done = True

        if done:
            # Add final reward if done
            remaining_steps = self._max_steps - (
                self._curr_step +
                1) if self._max_steps is not pyrado.inf else 0
            self._curr_rew += self._task.final_rew(self.state, remaining_steps)

        return self.observe(self.state), self._curr_rew, done, {}

    def render(self, mode: RenderMode, render_step: int = 1):
        # Call base class
        super().render(mode)

        # Print to console
        if mode.text:
            if self._curr_step % render_step == 0 and self._curr_step > 0:  # skip the render before the first step
                print(
                    f"step: {self._curr_step:4d}  |  r_t: {self._curr_rew: 1.3f}  |  a_t: {self._curr_act}  |  s_t+1: {self.state}"
                )