def _compute_sensation_(self, name, sensor_window, timestamp_window, index_window): """Creates and saves an observation vector based on sensory data. For reacher environments the observation vector is a concatenation of: - current joint angle positions; - current joint angle velocities; - diference between current end-effector and target Cartesian coordinates; - previous action; Args: name: a string specifying the name of a communicator that received given sensory data. sensor_window: a list of latest sensory observations stored in communicator sensor buffer. the length of list is defined by obs_history parameter. timestamp_window: a list of latest timestamp values stored in communicator buffer. index_window: a list of latest sensor index values stored in communicator buffer. Returns: A numpy array containing concatenated [observation, reward, done] vector. """ index_end = len(sensor_window) index_start = index_end - self._obs_history self._q_ = np.array([sensor_window[i]['q_actual'][0] for i in range(index_start,index_end)]) self._qt_ = np.array([sensor_window[i]['q_target'][0] for i in range(index_start,index_end)]) self._qd_ = np.array([sensor_window[i]['qd_actual'][0] for i in range(index_start,index_end)]) self._qdt_ = np.array([sensor_window[i]['qd_target'][0] for i in range(index_start,index_end)]) self._qddt_ = np.array([sensor_window[i]['qdd_target'][0] for i in range(index_start,index_end)]) self._current_ = np.array([sensor_window[i]['i_actual'][0] for i in range(index_start,index_end)]) self._currentt_ = np.array([sensor_window[i]['i_target'][0] for i in range(index_start,index_end)]) self._currentc_ = np.array([sensor_window[i]['i_control'][0] for i in range(index_start,index_end)]) self._mt_ = np.array([sensor_window[i]['m_target'][0] for i in range(index_start,index_end)]) self._voltage_ = np.array([sensor_window[i]['v_actual'][0] for i in range(index_start,index_end)]) self._safety_mode_ = np.array([sensor_window[i]['safety_mode'][0] for i in range(index_start,index_end)]) #TODO: should there be checks for safety modes greater than pstop here, and exit if found? # Compute end effector position x = ur_utils.forward(sensor_window[-1]['q_actual'][0], self._ik_params)[:3, 3] np.copyto(self._x_, x) if self._target_type == 'position': self._target_diff_ = self._x_[self._end_effector_indices] - self._target_ elif self._target_type == 'angle': self._target_diff_ = self._q_[-1, self._joint_indices] - self._target_ self._reward_.value = self._compute_reward_() # TODO: use the correct obs that matches the observation_space return np.concatenate((self._q_[:, self._joint_indices].flatten(), self._qd_[:, self._joint_indices].flatten() / self._speed_high, self._target_diff_, self._action_ / self._action_high, [self._reward_.value], [0]))
def _check_bound(self, q): """Checks whether given arm joints configuration is within box. Args: q: a numpy array of joints angle positions. Returns: A tuple (inside_bound, inside_buffer_bound, mat, xyz), where inside_bound is a bool specifying whether the arm is within safety bounds, inside_buffer_bound is a bool specifying whether the arm is within bounds and is at least at buffer distance to the closest bound, mat is a 4x4 position matrix returned by solving forward kinematics equations, xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] inside_bound = np.all(self._end_effector_low <= xyz) and np.all(xyz <= self._end_effector_high) inside_buffer_bound = (np.all(self._end_effector_low + self._box_bound_buffer <= xyz) and \ np.all(xyz <= self._end_effector_high - self._box_bound_buffer)) return inside_bound, inside_buffer_bound, mat, xyz