Beispiel #1
0
    def _compute_sensation_(self, name, sensor_window, timestamp_window, index_window):
        """Creates and saves an observation vector based on sensory data.

        For reacher environments the observation vector is a concatenation of:
            - current joint angle positions;
            - current joint angle velocities;
            - diference between current end-effector and target
              Cartesian coordinates;
            - previous action;

        Args:
            name: a string specifying the name of a communicator that
                received given sensory data.
            sensor_window: a list of latest sensory observations stored in
                communicator sensor buffer. the length of list is defined by
                obs_history parameter.
            timestamp_window: a list of latest timestamp values stored in
                communicator buffer.
            index_window: a list of latest sensor index values stored in
                communicator buffer.

        Returns:
            A numpy array containing concatenated [observation, reward, done]
            vector.
        """
        index_end = len(sensor_window)
        index_start = index_end - self._obs_history
        self._q_ = np.array([sensor_window[i]['q_actual'][0] for i in range(index_start,index_end)])
        self._qt_ = np.array([sensor_window[i]['q_target'][0] for i in range(index_start,index_end)])
        self._qd_ = np.array([sensor_window[i]['qd_actual'][0] for i in range(index_start,index_end)])
        self._qdt_ = np.array([sensor_window[i]['qd_target'][0] for i in range(index_start,index_end)])
        self._qddt_ = np.array([sensor_window[i]['qdd_target'][0] for i in range(index_start,index_end)])

        self._current_ = np.array([sensor_window[i]['i_actual'][0] for i in range(index_start,index_end)])
        self._currentt_ = np.array([sensor_window[i]['i_target'][0] for i in range(index_start,index_end)])
        self._currentc_ = np.array([sensor_window[i]['i_control'][0] for i in range(index_start,index_end)])
        self._mt_ = np.array([sensor_window[i]['m_target'][0] for i in range(index_start,index_end)])
        self._voltage_ = np.array([sensor_window[i]['v_actual'][0] for i in range(index_start,index_end)])

        self._safety_mode_ = np.array([sensor_window[i]['safety_mode'][0] for i in range(index_start,index_end)])

        #TODO: should there be checks for safety modes greater than pstop here, and exit if found?

        # Compute end effector position
        x = ur_utils.forward(sensor_window[-1]['q_actual'][0], self._ik_params)[:3, 3]
        np.copyto(self._x_, x)

        if self._target_type == 'position':
            self._target_diff_ = self._x_[self._end_effector_indices] - self._target_
        elif self._target_type == 'angle':
            self._target_diff_ = self._q_[-1, self._joint_indices] - self._target_

        self._reward_.value = self._compute_reward_()
        # TODO: use the correct obs that matches the observation_space
        return np.concatenate((self._q_[:, self._joint_indices].flatten(),
                               self._qd_[:, self._joint_indices].flatten() / self._speed_high,
                               self._target_diff_,
                               self._action_ / self._action_high,
                               [self._reward_.value],
                               [0]))
Beispiel #2
0
    def _check_bound(self, q):
        """Checks whether given arm joints configuration is within box.

        Args:
            q: a numpy array of joints angle positions.

        Returns:
            A tuple (inside_bound, inside_buffer_bound, mat, xyz), where
            inside_bound is a bool specifying whether the arm is within
            safety bounds, inside_buffer_bound is a bool specifying
            whether the arm is within bounds and is at least at buffer
            distance to the closest bound, mat is a 4x4 position matrix
            returned by solving forward kinematics equations, xyz are
            the x,y,z coordinates of an end-effector in a Cartesian space.
        """
        mat = ur_utils.forward(q, self._ik_params)
        xyz = mat[:3, 3]
        inside_bound = np.all(self._end_effector_low <= xyz) and np.all(xyz <= self._end_effector_high)
        inside_buffer_bound = (np.all(self._end_effector_low + self._box_bound_buffer <= xyz) and \
                               np.all(xyz <= self._end_effector_high - self._box_bound_buffer))
        return inside_bound, inside_buffer_bound, mat, xyz