예제 #1
0
    def calibrate(self):
        if self._calibrated:
            return
        print_cbt("Calibrating the cart-pole ...", "c")

        # Go to the left
        with completion_context("Going to the left", color="c", bright=True):
            obs, _, _, _ = self.step(np.zeros(self.act_space.shape))
            ctrl = QCartPoleGoToLimCtrl(obs, positive=True)
            while not ctrl.done:
                act = ctrl(obs)
                obs, _, _, _ = self.step(act)

            if ctrl.success:
                self._norm_x_lim[1] = obs[0]
            else:
                raise RuntimeError("Going to the left limit failed!")

        # Go to the right
        with completion_context("Going to the right", color="c", bright=True):
            obs, _, _, _ = self.step(np.zeros(self.act_space.shape))
            ctrl = QCartPoleGoToLimCtrl(obs, positive=False)
            while not ctrl.done:
                act = ctrl(obs)
                obs, _, _, _ = self.step(act)

            if ctrl.success:
                self._norm_x_lim[0] = obs[0]
            else:
                raise RuntimeError("Going to the right limit failed!")

        # Activate the absolute cart position:
        self._calibrated = True
예제 #2
0
    def _wait_for_upright_pole(self):
        """Waiting until the user manually set the pole upright"""
        with completion_context("Centering the cart", color="c", bright=True):
            # Initialize
            t_max, t_start = 15.0, time.time()
            upright = False

            pos_th = np.array([self._c_lim, 2.0 * np.pi / 180.0])
            vel_th = 0.1 * np.ones(2)
            th = np.hstack((pos_th, vel_th))

            # Wait until the pole is upright
            while (time.time() - t_start) <= t_max:
                obs, _, _, _ = self.step(np.zeros(self.act_space.shape))
                time.sleep(1 / 550.0)

                abs_err = np.abs(np.array([obs[1], obs[2]]) - np.array([[0.0, -1.0]]))
                err_th = np.array([np.sin(np.deg2rad(3.0)), np.sin(np.deg2rad(3.0))])

                if np.all(abs_err <= err_th):
                    upright = True
                    break

            if not upright:
                # time.sleep(0.1)
                state_str = np.array2string(
                    np.abs(obs),
                    suppress_small=True,
                    precision=2,
                    formatter={"float_kind": lambda x: "{0:+05.2f}".format(x)},
                )
                th_str = np.array2string(
                    th, suppress_small=True, precision=2, formatter={"float_kind": lambda x: "{0:+05.2f}".format(x)}
                )
                raise TimeoutError("The pole is not upright: {0} > {1}".format(state_str, th_str))
예제 #3
0
    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        if not self._connected:
            print_cbt('Not connected to Barret WAM client.', 'r', bright=True)
            raise pyrado.ValueErr(given=self._connected, eq_constraint=True)

        # Create a direct control process to set the PD gains
        self._client.set(robcom.Streaming, 500.)  # Hz
        dc = self._client.create(robcom.DirectControl, self._robot_group_name,
                                 "")
        dc.start()
        dc.groups.set(robcom.JointDesState.P_GAIN,
                      wam_pgains[:self.num_dof].tolist())
        dc.groups.set(robcom.JointDesState.D_GAIN,
                      wam_dgains[:self.num_dof].tolist())
        dc.send_updates()
        dc.stop()

        # Read and print the set gains to confirm that they were set correctly
        time.sleep(
            0.1
        )  # short active waiting because updates are sent in another thread
        pgains_des = self._jg.get_desired(robcom.JointDesState.P_GAIN)
        dgains_des = self._jg.get_desired(robcom.JointDesState.D_GAIN)
        print_cbt(f'Desired PD gains are set to: {pgains_des} \t {dgains_des}',
                  color='g')

        # Create robcom GoTo process
        gt = self._client.create(robcom.Goto, self._robot_group_name, '')

        # Move to initial state within 5 seconds
        gt.add_step(5., self.qpos_des_init)

        # Start process and wait for completion
        with completion_context(
                'Moving the Barret WAM to the initial position',
                color='c',
                bright=True):
            gt.start()
            gt.wait_for_completion()

        # Reset the task which also resets the reward function if necessary
        self._task.reset(env_spec=self.spec)

        # Reset time steps
        self._curr_step = 0

        # Reset real WAM trajectory container
        self.qpos_real = np.zeros((self.max_steps, self.num_dof))
        self.qvel_real = np.zeros((self.max_steps, self.num_dof))

        # Reset the control process as well as state and trajectory params
        input('Hit enter to continue.')
        self._reset()

        return self.observe(self.state)
예제 #4
0
    def calibrate(self):
        """Calibration routine to move to the init position and determine the sensor offset"""
        with completion_context("Estimating sensor offset",
                                color="c",
                                bright=True):
            # Reset calibration
            self._sens_offset = np.zeros(
                4
            )  # last two entries are never calibrated but useful for broadcasting
            self._wait_for_pole_at_rest()

            # Create parts of the calibration controller
            go_right = QQubeGoToLimCtrl(positive=True,
                                        cnt_done=int(1.5 / self._dt))
            go_left = QQubeGoToLimCtrl(positive=False,
                                       cnt_done=int(1.5 / self._dt))
            go_center = QQubePDCtrl(self.spec)

            # Estimate alpha offset. Go to both limits for theta calibration.
            meas = self._qsoc.snd_rcv(np.zeros(self.act_space.shape))
            while not go_right.done:
                meas = self._qsoc.snd_rcv(go_right(to.from_numpy(meas)))
            while not go_left.done:
                meas = self._qsoc.snd_rcv(go_left(to.from_numpy(meas)))
            self._sens_offset[0] = (go_right.th_lim + go_left.th_lim) / 2

            # Estimate alpha offset
            self._wait_for_pole_at_rest()
            meas = self._qsoc.snd_rcv(np.zeros(self.act_space.shape))
            self._sens_offset[1] = meas[1]

        print_cbt(
            f"Sensor offset: "
            f"theta = {self._sens_offset[0]*180/np.pi:.3f} deg, "
            f"alpha = {self._sens_offset[1]*180/np.pi:.3f} deg",
            "g",
        )

        with completion_context("Centering cube", color="c", bright=True):
            meas = self._qsoc.snd_rcv(np.zeros(self.act_space.shape))
            while not go_center.done:
                meas = self._qsoc.snd_rcv(
                    go_center(to.from_numpy(meas - self._sens_offset)))
예제 #5
0
    def step(self, act: np.ndarray) -> tuple:
        if self._curr_step == 0:
            print_cbt("Pre-sampling policy...", "w")

        info = dict(act_raw=act.copy())

        # Current reward depending on the (measurable) state and the current (unlimited) action
        remaining_steps = self._max_steps - (
            self._curr_step + 1) if self._max_steps is not pyrado.inf else 0
        self._curr_rew = self._task.step_rew(
            self.state, act, remaining_steps)  # always 0 for wam-bic-real

        # Limit the action
        act = self.limit_act(act)

        # The policy operates on specific indices self._idcs_act, i.e. joint 1 and 3 (and 5)
        self.qpos_des[self._curr_step,
                      self._idcs_act] += act[:len(self._idcs_act)]
        self.qvel_des[self._curr_step,
                      self._idcs_act] += act[len(self._idcs_act):]

        # Update current step and state
        self._curr_step += 1
        self.state = np.array([self._curr_step / self.max_steps])

        # A GoallessTask only signals done when has_failed() is true, i.e. the the state is out of bounds
        done = self._task.is_done(self.state)  # always false for wam-bic-real

        # Only start execution of process when all desired poses have been sampled from the policy
        if self._curr_step >= self._max_steps:
            done = True  # exceeded max time steps
            with completion_context("Executing trajectory on Barret WAM",
                                    color="c",
                                    bright=True):
                self._dc.start(False, round(500 * self._dt), self._callback,
                               ["POS", "VEL"], [], [])
                t_start = time.time()
                self._dc.wait_for_completion()
                t_stop = time.time()
            print_cbt(f"Execution took {t_stop - t_start:1.5f} s.", "g")

        # Add final reward if done
        if done:
            # Ask the user to enter the final reward
            self._curr_rew += self._task.final_rew(self.state, remaining_steps)

            # Stop robcom data streaming
            self._client.set(robcom.Streaming, False)

        return self.observe(self.state), self._curr_rew, done, info
예제 #6
0
    def reset(self, *args, **kwargs):
        """
        Reset the environment.
        The base version (re-)opens the socket and resets the task.

        :param args: just for compatibility with SimEnv. All args can be ignored.
        :param kwargs: just for compatibility with SimEnv. All kwargs can be ignored.
        """
        # Cancel and re-open the connection to the socket
        with completion_context("Connecting to Quanser device", color="c"):
            self._qsoc.close()
            self._qsoc.open()

        # Reset the task
        self._task.reset(env_spec=self.spec)
예제 #7
0
    def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.ndarray:
        if not self._connected:
            print_cbt("Not connected to Barret WAM client.", "r", bright=True)
            raise pyrado.ValueErr(given=self._connected, eq_constraint=True)

        # Create a direct control process to set the PD gains
        self._client.set(robcom.Streaming, 500.0)  # Hz
        dc = self._client.create(robcom.DirectControl, self._robot_group_name, "")
        dc.start()
        if self.num_dof == 4:
            dc.groups.set(robcom.JointDesState.P_GAIN, WAM_PGAINS_4DOF.tolist())
            dc.groups.set(robcom.JointDesState.D_GAIN, WAM_DGAINS_4DOF.tolist())
        else:
            dc.groups.set(robcom.JointDesState.P_GAIN, WAM_PGAINS_7DOF.tolist())
            dc.groups.set(robcom.JointDesState.D_GAIN, WAM_DGAINS_7DOF.tolist())
        dc.send_updates()
        dc.stop()

        # Read and print the set gains to confirm that they were set correctly
        time.sleep(0.1)  # short active waiting because updates are sent in another thread
        pgains_des = self._jg.get_desired(robcom.JointDesState.P_GAIN)
        dgains_des = self._jg.get_desired(robcom.JointDesState.D_GAIN)
        print_cbt(f"Desired P-gains have been set to {pgains_des}", color="g")
        print_cbt(f"Desired D-gains have been set to {dgains_des}", color="g")

        # Create robcom GoTo process
        gt = self._client.create(robcom.Goto, self._robot_group_name, "")

        # Move to initial state within 4 seconds
        gt.add_step(4.0, self._qpos_des_init)

        # Start process and wait for completion
        with completion_context("Moving the Barret WAM to the initial position", color="c", bright=True):
            gt.start()
            gt.wait_for_completion()

        # Reset the task which also resets the reward function if necessary
        self._task.reset(env_spec=self.spec)

        # Reset time steps
        self._curr_step = 0

        # Reset trajectory containers
        self.qpos_real = np.zeros((self.max_steps, self._num_dof))
        self.qvel_real = np.zeros((self.max_steps, self._num_dof))

        # return self.observe(self.state)
        return None  # TODO
예제 #8
0
def remove_all_dr_wrappers(env: Env, verbose: bool = False):
    """
    Go through the environment chain and remove all wrappers of type `DomainRandWrapper` (and subclasses).

    :param env: env chain with domain randomization wrappers
    :param verbose: choose if status messages should be printed
    :return: env chain without domain randomization wrappers
    """
    while any(isinstance(subenv, DomainRandWrapper) for subenv in all_envs(env)):
        if verbose:
            with completion_context(
                f"Found domain randomization wrapper of type {type(env).__name__}. Removing it now",
                color="y",
                bright=True,
            ):
                env = remove_env(env, DomainRandWrapper)
        else:
            env = remove_env(env, DomainRandWrapper)
    return env
예제 #9
0
    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        # Call WAMReal's reset
        super().reset(init_state, domain_param)

        # Get the robot access manager, to control that synchronized data is received
        self._ram = robcom.RobotAccessManager()

        # Reset desired positions and velocities
        self.qpos_des = self._qpos_des_init.copy()
        self.qvel_des = np.zeros_like(self._qpos_des_init)

        # Create robcom direct-control process
        self._dc = self._client.create(robcom.DirectControl,
                                       self._robot_group_name, "")

        # Start NatNet client only once
        if self.natnet_client.dataSocket is None or self.natnet_client.commandSocket is None:
            self.natnet_client.run()

            # If the rigid body tracker is not ready yet, get_current_estimate() will throw an error
            with completion_context("Initializing rigid body tracker",
                                    color="c"):
                while not self.rigid_body_tracker.initialized():
                    time.sleep(0.05)

        # Determine offset for the rigid body tracker (from OptiTrack to MuJoCo)
        cup_pos_init_sim = CUP_POS_INIT_SIM_4DOF if self._num_dof == 4 else CUP_POS_INIT_SIM_7DOF
        self.rigid_body_tracker.reset_offset()
        offset = self.rigid_body_tracker.get_current_estimate(
            ["Cup"])[0] - cup_pos_init_sim
        self.rigid_body_tracker.offset = offset

        # Get current joint state
        self.state = np.concatenate(self._get_joint_state())

        # Set the time for the busy waiting sleep call in step()
        self._t = time.time()
        self._cnt_too_slow = 0

        input("Hit enter to continue.")
        return self.observe(self.state)
예제 #10
0
    def _center_cart(self):
        """Move the cart to the center (x = 0)."""
        # Initialize
        t_max, t_start = 10.0, time.time()
        obs, _, _, _ = self.step(np.zeros(self.act_space.shape))

        with completion_context("Centering the cart", color="c", bright=True):
            while (time.time() - t_start) < t_max:
                act = -np.sign(obs[0]) * 1.5 * np.ones(self.act_space.shape)
                obs, _, _, _ = self.step(act)

                if np.abs(obs[0]) <= self._c_lim / 10.0:
                    break

            # Stop the cart
            obs, _, _, _ = self.step(np.zeros(self.act_space.shape))
            time.sleep(0.5)

            if np.abs(obs[0]) > self._c_lim:
                # time.sleep(0.1)
                raise RuntimeError(f"Centering of the cart failed: |x| = {np.abs(obs[0]):.2f} > {self._c_lim:.2f}")
예제 #11
0
    def eval_posterior(
        posterior: DirectPosterior,
        data_real: to.Tensor,
        num_samples: int,
        calculate_log_probs: bool = True,
        normalize_posterior: bool = True,
        subrtn_sbi_sampling_hparam: Optional[dict] = None,
    ) -> Tuple[to.Tensor, Optional[to.Tensor]]:
        r"""
        Evaluates the posterior by computing parameter samples given observed data, its log probability
        and the simulated trajectory.

        :param posterior: posterior to evaluate, e.g. a normalizing flow, that samples domain parameters conditioned on
                          the provided data
        :param data_real: data from the real-world rollouts a.k.a. set of $x_o$ of shape
                          [num_iter, num_rollouts_per_iter * dim_feat]
        :param num_samples: number of samples to draw from the posterior
        :param calculate_log_probs: if `True`, the log-probabilities are computed, else `None` is returned
        :param normalize_posterior: if `True`, the normalization of the posterior density is enforced by sbi
        :param subrtn_sbi_sampling_hparam: keyword arguments forwarded to sbi's `DirectPosterior.sample()` function
        :return: domain parameters sampled form the posterior of shape [batch_size, num_samples, dim_domain_param], as
                 well as the log-probabilities of these domain parameters
        """
        if not isinstance(data_real, to.Tensor) or data_real.ndim != 2:
            raise pyrado.ShapeErr(
                msg=
                f"The data must be a 2-dim PyTorch tensor, but is of shape {data_real.shape}!"
            )

        batch_size, _ = data_real.shape

        # Sample domain parameters for all batches and stack them
        default_sampling_hparam = dict(
            mcmc_method="slice_np_vectorized",
            mcmc_parameters=dict(warmup_steps=50,
                                 num_chains=100,
                                 init_strategy="sir"),  # default: slice_np, 20
        )
        if subrtn_sbi_sampling_hparam is None:
            subrtn_sbi_sampling_hparam = dict()
        elif isinstance(subrtn_sbi_sampling_hparam, dict):
            subrtn_sbi_sampling_hparam = merge_dicts(
                [default_sampling_hparam, subrtn_sbi_sampling_hparam])
        else:
            raise pyrado.TypeErr(given=subrtn_sbi_sampling_hparam,
                                 expected_type=dict)

        # Sample domain parameters from the posterior
        domain_params = to.stack(
            [
                posterior.sample(
                    (num_samples, ), x=x_o, **subrtn_sbi_sampling_hparam)
                for x_o in data_real
            ],
            dim=0,
        )

        # Check shape
        if not domain_params.ndim == 3 or domain_params.shape[:2] != (
                batch_size, num_samples):
            raise pyrado.ShapeErr(
                msg=
                f"The sampled domain parameters must be a 3-dim tensor where the 1st dimension is {batch_size} and "
                f"the 2nd dimension is {num_samples}, but it is of shape {domain_params.shape}!"
            )

        # Compute the log probability if desired
        if calculate_log_probs:
            # Batch-wise computation and stacking
            with completion_context("Evaluating posterior", color="w"):
                log_probs = to.stack(
                    [
                        posterior.log_prob(
                            dp, x=x_o, norm_posterior=normalize_posterior)
                        for dp, x_o in zip(domain_params, data_real)
                    ],
                    dim=0,
                )

            # Check shape
            if log_probs.shape != (batch_size, num_samples):
                raise pyrado.ShapeErr(given=log_probs,
                                      expected_match=(batch_size, num_samples))

        else:
            log_probs = None

        return domain_params, log_probs
예제 #12
0
def test_check_prompt():
    with completion_context('Works fine', color='g'):
        a = 3
    with pytest.raises(ZeroDivisionError):
        with completion_context('Works fine', color='r', bright=True):
            a = 3 / 0
예제 #13
0
def test_check_prompt():
    with completion_context("Works fine", color="g"):
        a = 3
    with pytest.raises(ZeroDivisionError):
        with completion_context("Works fine", color="r", bright=True):
            a = 3 / 0