def calibrate(self): if self._calibrated: return print_cbt("Calibrating the cart-pole ...", "c") # Go to the left with completion_context("Going to the left", color="c", bright=True): obs, _, _, _ = self.step(np.zeros(self.act_space.shape)) ctrl = QCartPoleGoToLimCtrl(obs, positive=True) while not ctrl.done: act = ctrl(obs) obs, _, _, _ = self.step(act) if ctrl.success: self._norm_x_lim[1] = obs[0] else: raise RuntimeError("Going to the left limit failed!") # Go to the right with completion_context("Going to the right", color="c", bright=True): obs, _, _, _ = self.step(np.zeros(self.act_space.shape)) ctrl = QCartPoleGoToLimCtrl(obs, positive=False) while not ctrl.done: act = ctrl(obs) obs, _, _, _ = self.step(act) if ctrl.success: self._norm_x_lim[0] = obs[0] else: raise RuntimeError("Going to the right limit failed!") # Activate the absolute cart position: self._calibrated = True
def _wait_for_upright_pole(self): """Waiting until the user manually set the pole upright""" with completion_context("Centering the cart", color="c", bright=True): # Initialize t_max, t_start = 15.0, time.time() upright = False pos_th = np.array([self._c_lim, 2.0 * np.pi / 180.0]) vel_th = 0.1 * np.ones(2) th = np.hstack((pos_th, vel_th)) # Wait until the pole is upright while (time.time() - t_start) <= t_max: obs, _, _, _ = self.step(np.zeros(self.act_space.shape)) time.sleep(1 / 550.0) abs_err = np.abs(np.array([obs[1], obs[2]]) - np.array([[0.0, -1.0]])) err_th = np.array([np.sin(np.deg2rad(3.0)), np.sin(np.deg2rad(3.0))]) if np.all(abs_err <= err_th): upright = True break if not upright: # time.sleep(0.1) state_str = np.array2string( np.abs(obs), suppress_small=True, precision=2, formatter={"float_kind": lambda x: "{0:+05.2f}".format(x)}, ) th_str = np.array2string( th, suppress_small=True, precision=2, formatter={"float_kind": lambda x: "{0:+05.2f}".format(x)} ) raise TimeoutError("The pole is not upright: {0} > {1}".format(state_str, th_str))
def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.ndarray: if not self._connected: print_cbt('Not connected to Barret WAM client.', 'r', bright=True) raise pyrado.ValueErr(given=self._connected, eq_constraint=True) # Create a direct control process to set the PD gains self._client.set(robcom.Streaming, 500.) # Hz dc = self._client.create(robcom.DirectControl, self._robot_group_name, "") dc.start() dc.groups.set(robcom.JointDesState.P_GAIN, wam_pgains[:self.num_dof].tolist()) dc.groups.set(robcom.JointDesState.D_GAIN, wam_dgains[:self.num_dof].tolist()) dc.send_updates() dc.stop() # Read and print the set gains to confirm that they were set correctly time.sleep( 0.1 ) # short active waiting because updates are sent in another thread pgains_des = self._jg.get_desired(robcom.JointDesState.P_GAIN) dgains_des = self._jg.get_desired(robcom.JointDesState.D_GAIN) print_cbt(f'Desired PD gains are set to: {pgains_des} \t {dgains_des}', color='g') # Create robcom GoTo process gt = self._client.create(robcom.Goto, self._robot_group_name, '') # Move to initial state within 5 seconds gt.add_step(5., self.qpos_des_init) # Start process and wait for completion with completion_context( 'Moving the Barret WAM to the initial position', color='c', bright=True): gt.start() gt.wait_for_completion() # Reset the task which also resets the reward function if necessary self._task.reset(env_spec=self.spec) # Reset time steps self._curr_step = 0 # Reset real WAM trajectory container self.qpos_real = np.zeros((self.max_steps, self.num_dof)) self.qvel_real = np.zeros((self.max_steps, self.num_dof)) # Reset the control process as well as state and trajectory params input('Hit enter to continue.') self._reset() return self.observe(self.state)
def calibrate(self): """Calibration routine to move to the init position and determine the sensor offset""" with completion_context("Estimating sensor offset", color="c", bright=True): # Reset calibration self._sens_offset = np.zeros( 4 ) # last two entries are never calibrated but useful for broadcasting self._wait_for_pole_at_rest() # Create parts of the calibration controller go_right = QQubeGoToLimCtrl(positive=True, cnt_done=int(1.5 / self._dt)) go_left = QQubeGoToLimCtrl(positive=False, cnt_done=int(1.5 / self._dt)) go_center = QQubePDCtrl(self.spec) # Estimate alpha offset. Go to both limits for theta calibration. meas = self._qsoc.snd_rcv(np.zeros(self.act_space.shape)) while not go_right.done: meas = self._qsoc.snd_rcv(go_right(to.from_numpy(meas))) while not go_left.done: meas = self._qsoc.snd_rcv(go_left(to.from_numpy(meas))) self._sens_offset[0] = (go_right.th_lim + go_left.th_lim) / 2 # Estimate alpha offset self._wait_for_pole_at_rest() meas = self._qsoc.snd_rcv(np.zeros(self.act_space.shape)) self._sens_offset[1] = meas[1] print_cbt( f"Sensor offset: " f"theta = {self._sens_offset[0]*180/np.pi:.3f} deg, " f"alpha = {self._sens_offset[1]*180/np.pi:.3f} deg", "g", ) with completion_context("Centering cube", color="c", bright=True): meas = self._qsoc.snd_rcv(np.zeros(self.act_space.shape)) while not go_center.done: meas = self._qsoc.snd_rcv( go_center(to.from_numpy(meas - self._sens_offset)))
def step(self, act: np.ndarray) -> tuple: if self._curr_step == 0: print_cbt("Pre-sampling policy...", "w") info = dict(act_raw=act.copy()) # Current reward depending on the (measurable) state and the current (unlimited) action remaining_steps = self._max_steps - ( self._curr_step + 1) if self._max_steps is not pyrado.inf else 0 self._curr_rew = self._task.step_rew( self.state, act, remaining_steps) # always 0 for wam-bic-real # Limit the action act = self.limit_act(act) # The policy operates on specific indices self._idcs_act, i.e. joint 1 and 3 (and 5) self.qpos_des[self._curr_step, self._idcs_act] += act[:len(self._idcs_act)] self.qvel_des[self._curr_step, self._idcs_act] += act[len(self._idcs_act):] # Update current step and state self._curr_step += 1 self.state = np.array([self._curr_step / self.max_steps]) # A GoallessTask only signals done when has_failed() is true, i.e. the the state is out of bounds done = self._task.is_done(self.state) # always false for wam-bic-real # Only start execution of process when all desired poses have been sampled from the policy if self._curr_step >= self._max_steps: done = True # exceeded max time steps with completion_context("Executing trajectory on Barret WAM", color="c", bright=True): self._dc.start(False, round(500 * self._dt), self._callback, ["POS", "VEL"], [], []) t_start = time.time() self._dc.wait_for_completion() t_stop = time.time() print_cbt(f"Execution took {t_stop - t_start:1.5f} s.", "g") # Add final reward if done if done: # Ask the user to enter the final reward self._curr_rew += self._task.final_rew(self.state, remaining_steps) # Stop robcom data streaming self._client.set(robcom.Streaming, False) return self.observe(self.state), self._curr_rew, done, info
def reset(self, *args, **kwargs): """ Reset the environment. The base version (re-)opens the socket and resets the task. :param args: just for compatibility with SimEnv. All args can be ignored. :param kwargs: just for compatibility with SimEnv. All kwargs can be ignored. """ # Cancel and re-open the connection to the socket with completion_context("Connecting to Quanser device", color="c"): self._qsoc.close() self._qsoc.open() # Reset the task self._task.reset(env_spec=self.spec)
def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.ndarray: if not self._connected: print_cbt("Not connected to Barret WAM client.", "r", bright=True) raise pyrado.ValueErr(given=self._connected, eq_constraint=True) # Create a direct control process to set the PD gains self._client.set(robcom.Streaming, 500.0) # Hz dc = self._client.create(robcom.DirectControl, self._robot_group_name, "") dc.start() if self.num_dof == 4: dc.groups.set(robcom.JointDesState.P_GAIN, WAM_PGAINS_4DOF.tolist()) dc.groups.set(robcom.JointDesState.D_GAIN, WAM_DGAINS_4DOF.tolist()) else: dc.groups.set(robcom.JointDesState.P_GAIN, WAM_PGAINS_7DOF.tolist()) dc.groups.set(robcom.JointDesState.D_GAIN, WAM_DGAINS_7DOF.tolist()) dc.send_updates() dc.stop() # Read and print the set gains to confirm that they were set correctly time.sleep(0.1) # short active waiting because updates are sent in another thread pgains_des = self._jg.get_desired(robcom.JointDesState.P_GAIN) dgains_des = self._jg.get_desired(robcom.JointDesState.D_GAIN) print_cbt(f"Desired P-gains have been set to {pgains_des}", color="g") print_cbt(f"Desired D-gains have been set to {dgains_des}", color="g") # Create robcom GoTo process gt = self._client.create(robcom.Goto, self._robot_group_name, "") # Move to initial state within 4 seconds gt.add_step(4.0, self._qpos_des_init) # Start process and wait for completion with completion_context("Moving the Barret WAM to the initial position", color="c", bright=True): gt.start() gt.wait_for_completion() # Reset the task which also resets the reward function if necessary self._task.reset(env_spec=self.spec) # Reset time steps self._curr_step = 0 # Reset trajectory containers self.qpos_real = np.zeros((self.max_steps, self._num_dof)) self.qvel_real = np.zeros((self.max_steps, self._num_dof)) # return self.observe(self.state) return None # TODO
def remove_all_dr_wrappers(env: Env, verbose: bool = False): """ Go through the environment chain and remove all wrappers of type `DomainRandWrapper` (and subclasses). :param env: env chain with domain randomization wrappers :param verbose: choose if status messages should be printed :return: env chain without domain randomization wrappers """ while any(isinstance(subenv, DomainRandWrapper) for subenv in all_envs(env)): if verbose: with completion_context( f"Found domain randomization wrapper of type {type(env).__name__}. Removing it now", color="y", bright=True, ): env = remove_env(env, DomainRandWrapper) else: env = remove_env(env, DomainRandWrapper) return env
def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.ndarray: # Call WAMReal's reset super().reset(init_state, domain_param) # Get the robot access manager, to control that synchronized data is received self._ram = robcom.RobotAccessManager() # Reset desired positions and velocities self.qpos_des = self._qpos_des_init.copy() self.qvel_des = np.zeros_like(self._qpos_des_init) # Create robcom direct-control process self._dc = self._client.create(robcom.DirectControl, self._robot_group_name, "") # Start NatNet client only once if self.natnet_client.dataSocket is None or self.natnet_client.commandSocket is None: self.natnet_client.run() # If the rigid body tracker is not ready yet, get_current_estimate() will throw an error with completion_context("Initializing rigid body tracker", color="c"): while not self.rigid_body_tracker.initialized(): time.sleep(0.05) # Determine offset for the rigid body tracker (from OptiTrack to MuJoCo) cup_pos_init_sim = CUP_POS_INIT_SIM_4DOF if self._num_dof == 4 else CUP_POS_INIT_SIM_7DOF self.rigid_body_tracker.reset_offset() offset = self.rigid_body_tracker.get_current_estimate( ["Cup"])[0] - cup_pos_init_sim self.rigid_body_tracker.offset = offset # Get current joint state self.state = np.concatenate(self._get_joint_state()) # Set the time for the busy waiting sleep call in step() self._t = time.time() self._cnt_too_slow = 0 input("Hit enter to continue.") return self.observe(self.state)
def _center_cart(self): """Move the cart to the center (x = 0).""" # Initialize t_max, t_start = 10.0, time.time() obs, _, _, _ = self.step(np.zeros(self.act_space.shape)) with completion_context("Centering the cart", color="c", bright=True): while (time.time() - t_start) < t_max: act = -np.sign(obs[0]) * 1.5 * np.ones(self.act_space.shape) obs, _, _, _ = self.step(act) if np.abs(obs[0]) <= self._c_lim / 10.0: break # Stop the cart obs, _, _, _ = self.step(np.zeros(self.act_space.shape)) time.sleep(0.5) if np.abs(obs[0]) > self._c_lim: # time.sleep(0.1) raise RuntimeError(f"Centering of the cart failed: |x| = {np.abs(obs[0]):.2f} > {self._c_lim:.2f}")
def eval_posterior( posterior: DirectPosterior, data_real: to.Tensor, num_samples: int, calculate_log_probs: bool = True, normalize_posterior: bool = True, subrtn_sbi_sampling_hparam: Optional[dict] = None, ) -> Tuple[to.Tensor, Optional[to.Tensor]]: r""" Evaluates the posterior by computing parameter samples given observed data, its log probability and the simulated trajectory. :param posterior: posterior to evaluate, e.g. a normalizing flow, that samples domain parameters conditioned on the provided data :param data_real: data from the real-world rollouts a.k.a. set of $x_o$ of shape [num_iter, num_rollouts_per_iter * dim_feat] :param num_samples: number of samples to draw from the posterior :param calculate_log_probs: if `True`, the log-probabilities are computed, else `None` is returned :param normalize_posterior: if `True`, the normalization of the posterior density is enforced by sbi :param subrtn_sbi_sampling_hparam: keyword arguments forwarded to sbi's `DirectPosterior.sample()` function :return: domain parameters sampled form the posterior of shape [batch_size, num_samples, dim_domain_param], as well as the log-probabilities of these domain parameters """ if not isinstance(data_real, to.Tensor) or data_real.ndim != 2: raise pyrado.ShapeErr( msg= f"The data must be a 2-dim PyTorch tensor, but is of shape {data_real.shape}!" ) batch_size, _ = data_real.shape # Sample domain parameters for all batches and stack them default_sampling_hparam = dict( mcmc_method="slice_np_vectorized", mcmc_parameters=dict(warmup_steps=50, num_chains=100, init_strategy="sir"), # default: slice_np, 20 ) if subrtn_sbi_sampling_hparam is None: subrtn_sbi_sampling_hparam = dict() elif isinstance(subrtn_sbi_sampling_hparam, dict): subrtn_sbi_sampling_hparam = merge_dicts( [default_sampling_hparam, subrtn_sbi_sampling_hparam]) else: raise pyrado.TypeErr(given=subrtn_sbi_sampling_hparam, expected_type=dict) # Sample domain parameters from the posterior domain_params = to.stack( [ posterior.sample( (num_samples, ), x=x_o, **subrtn_sbi_sampling_hparam) for x_o in data_real ], dim=0, ) # Check shape if not domain_params.ndim == 3 or domain_params.shape[:2] != ( batch_size, num_samples): raise pyrado.ShapeErr( msg= f"The sampled domain parameters must be a 3-dim tensor where the 1st dimension is {batch_size} and " f"the 2nd dimension is {num_samples}, but it is of shape {domain_params.shape}!" ) # Compute the log probability if desired if calculate_log_probs: # Batch-wise computation and stacking with completion_context("Evaluating posterior", color="w"): log_probs = to.stack( [ posterior.log_prob( dp, x=x_o, norm_posterior=normalize_posterior) for dp, x_o in zip(domain_params, data_real) ], dim=0, ) # Check shape if log_probs.shape != (batch_size, num_samples): raise pyrado.ShapeErr(given=log_probs, expected_match=(batch_size, num_samples)) else: log_probs = None return domain_params, log_probs
def test_check_prompt(): with completion_context('Works fine', color='g'): a = 3 with pytest.raises(ZeroDivisionError): with completion_context('Works fine', color='r', bright=True): a = 3 / 0
def test_check_prompt(): with completion_context("Works fine", color="g"): a = 3 with pytest.raises(ZeroDivisionError): with completion_context("Works fine", color="r", bright=True): a = 3 / 0