Beispiel #1
0
    def pack_sample(self, X, U):
        """Packs sample data into Sample object."""
        assert X.shape[0] == self.T
        assert U.shape[0] == self.T
        assert X.shape[1] == self.dX
        assert U.shape[1] == self.dU

        sample = Sample(self)
        for sensor, idx in self._x_data_idx.items():
            sample.set(sensor, X[:, idx])
        for actuator, idx in self._u_data_idx.items():
            sample.set(actuator, U[:, idx])
        sample.set(ACTION, U)
        return sample
Beispiel #2
0
def msg_to_sample(ros_msg, agent):
    """Convert a SampleResult ROS message into a Sample Python object."""
    sample = Sample(agent)

    velocity = np.array(ros_msg.velocity).reshape(7)
    joint_angles = np.array(ros_msg.joint_angles).reshape(7)
    ee_pos = np.array(ros_msg.ee_pos).reshape(9)
    ee_jacobians = np.array(ros_msg.ee_points_jacobian,
                            order="F").reshape(9, 7)

    sample.set(JOINT_VELOCITIES, velocity)
    sample.set(JOINT_ANGLES, joint_angles)
    sample.set(END_EFFECTOR_POINTS, ee_pos)
    sample.set(END_EFFECTOR_POINT_JACOBIANS, ee_jacobians)

    return sample
    def sample(
        self,
        policy,
        condition,
        save=True,
        noisy=True,
        reset_cond=None,
        randomize_initial_state=0,
        **kwargs,
    ):
        """Performs agent reset and rolls out given policy to collect a sample.

        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            reset_cond: The initial condition to reset the agent into.
            randomize_initial_state: Perform random steps after resetting to simulate a noisy initial state.

        Returns:
            sample: A Sample object.

        """
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        # Get a new sample
        sample = Sample(self)

        # Get initial state
        self.env.seed(None if reset_cond is None else self.x0[reset_cond])
        obs = self.env.reset()
        if randomize_initial_state > 0:
            # Take random steps randomize initial state distribution
            self.env._set_action(
                (self.env.action_space.high - self.env.action_space.low) / 12 *
                np.random.normal(size=self.dU) * randomize_initial_state)
            for _ in range(5):
                self.sim.step()
            obs = self.env.step(np.zeros(self.dU))[0]

        self.set_states(sample, obs, 0)
        U_0 = policy.act(sample.get_X(0), sample.get_obs(0), 0, noise)
        sample.set(ACTION, U_0, 0)
        for t in range(1, self.T):
            if self.render:
                self.env.render(mode='human')

            # Get state
            obs, _, done, _ = self.env.step(sample.get_U(t - 1))
            self.set_states(sample, obs, t)

            # Get action
            U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)
            sample.set(ACTION, U_t, t)

            if done and t < self.T - 1:
                raise Exception('Iteration ended prematurely %d/%d' %
                                (t + 1, self.T))
        if save:
            self._samples[condition].append(sample)
        return sample
Beispiel #4
0
    def sample(self,
               policy,
               condition,
               save=True,
               noisy=True,
               reset_cond=None,
               **kwargs):
        """Performs agent reset and rolls out given policy to collect a sample.

        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            reset_cond: The initial condition to reset the agent into.

        Returns:
            sample: A Sample object.

        """
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        sample = Sample(self)
        self.reset(reset_cond)

        # Execute policy over a time period of [0,T]
        start = time.time()
        for t in range(self.T):
            # Read sensors and store sensor data in sample
            latest_sample = self.get_data()
            for sensor_type in self.x_data_types:
                data = latest_sample.get(sensor_type)
                if self.scaler is not None:
                    data = self.__transform(sensor_type, data)
                sample.set(sensor_type, data, t)

            # Compute site Jacobians
            jac = np.tile(self.jac[:3], (3, 1))
            rotation = sp.spatial.transform.Rotation.from_euler(
                "XYZ", -latest_sample.get(END_EFFECTOR_ROTATIONS))
            for i in range(3):
                rot_ee = rotation.apply(self.ee_points[i])
                for k in range(6):
                    jac[i * 3:(i + 1) * 3,
                        k] += np.cross(self.jac[3:, k], rot_ee)
            sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=t)

            # Use END_EFFECTOR_POINTS as distance to target
            sample.set(END_EFFECTOR_POINTS,
                       sample.get(END_EFFECTOR_POINTS, t) -
                       self.ee_points_tgt / self.scaler.scale_[-9:],
                       t=t)

            # Get action
            U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)
            U_t = np.clip(U_t, -4, 4)

            # Perform action
            self.reset_arm(None, None, U_t, False)
            sample.set(ACTION, U_t, t)

            # Check if agent is keeping up
            sleep_time = start + (t + 1) * self.dt - time.time()
            if sleep_time < 0:
                logging.critical("Agent can't keep up. %fs behind." %
                                 sleep_time)
            elif sleep_time < self.dt / 2:
                logging.warning("Agent may not keep up (%.0f percent busy)" %
                                (((self.dt - sleep_time) / self.dt) * 100))

            # Wait for next timestep
            if sleep_time > 0:
                time.sleep(sleep_time)

        if save:
            self._samples[condition].append(sample)
        self.reset(reset_cond)
        return sample
    def sample(self,
               policy,
               condition,
               save=True,
               noisy=True,
               reset_cond=None,
               **kwargs):
        """Performs agent reset and rolls out given policy to collect a sample.

        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            reset_cond: The initial condition to reset the agent into.

        Returns:
            sample: A Sample object.

        """
        # Get a new sample
        sample = Sample(self)
        sample_ok = False
        while not sample_ok:
            if not self.debug:
                self.reset(reset_cond)

            self.__init_opcua()

            if noisy:
                noise = generate_noise(self.T, self.dU, self._hyperparams)
            else:
                noise = None

            # Execute policy over a time period of [0,T]
            start = time.time()
            for t in range(self.T):
                # Read sensors and store sensor data in sample
                def store_sensor(sensor):
                    sample.set(sensor, self.read_sensor(sensor), t)

                self.pool.map(store_sensor, self.sensors)
                # Override sensors
                for override in self.sensor_overrides:
                    if override['condition'](t):
                        sensor = override['sensor']
                        sample.set(sensor, override['value'](sample, t), t)

                print('X_%02d' % t, sample.get_X(t))

                # Get action
                U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)

                # Override actuators
                for override in self.actuator_overrides:
                    if override['condition'](t):
                        actuator = override['actuator']
                        U_t[self._u_data_idx[actuator]] = np.copy(
                            override['value'])

                # Send signals
                self.send_signals(t)

                # Perform action
                for actuator in self._u_data_idx:
                    self.write_actuator(actuator,
                                        U_t[self._u_data_idx[actuator]])
                sample.set(ACTION, U_t, t)

                print('U_%02d' % t, U_t)

                # Check if agent is keeping up
                sleep_time = start + (t + 1) * self.dt - time.time()
                if sleep_time < 0:
                    logging.critical("Agent can't keep up. %fs behind." %
                                     sleep_time)
                elif sleep_time < self.dt / 2:
                    logging.warning(
                        "Agent may not keep up (%.0f percent busy)" %
                        (((self.dt - sleep_time) / self.dt) * 100))

                # Wait for next timestep
                if sleep_time > 0 and not self.debug:
                    time.sleep(sleep_time)
            if save:
                self._samples[condition].append(sample)
            self.finalize_sample()

            sample_ok = self.debug or input('Continue?') == 'y'
            if not sample_ok:
                print('Repeating')
        return sample
Beispiel #6
0
    def sample(self,
               policy,
               condition,
               save=True,
               noisy=True,
               reset_cond=None,
               **kwargs):
        """Performs agent reset and rolls out given policy to collect a sample.

        Args:
            policy: A Policy object.
            condition: Which condition setup to run.
            save: Whether or not to store the trial into the samples.
            noisy: Whether or not to use noise during sampling.
            reset_cond: The initial condition to reset the agent into.

        Returns:
            sample: A Sample object.

        """
        if noisy:
            noise = generate_noise(self.T, self.dU, self._hyperparams)
        else:
            noise = np.zeros((self.T, self.dU))

        sample = Sample(self)
        self.reset(reset_cond)

        # Execute policy over a time period of [0,T]
        # TODO: Find better solution to change mode.
        # relax arm to change mode to torque. If this is not done, the mode will be changed in timestep t=0 causing
        # the loop to be slow in timestep t=1 because the mutex in the cpp is locked. """
        self.relax_arm()
        time.sleep(1)

        start = time.time()
        for t in range(self.T):
            # Read sensors and store sensor data in sample
            latest_sample = self.get_data()
            for sensor_type in self.x_data_types:
                sample.set(sensor_type, latest_sample.get(sensor_type), t)
            sample.set(END_EFFECTOR_POINT_JACOBIANS,
                       latest_sample.get(END_EFFECTOR_POINT_JACOBIANS),
                       t=t)

            # Get action
            U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise)

            # TODO: find better solution to clip (same as in cpp)
            torque_limits_ = np.array([4.0, 4.0, 4.0, 4.0, 1.0, 1.0, .5])
            U_t = np.clip(U_t, -torque_limits_, torque_limits_)

            # Perform action
            self.reset_arm(None, None, U_t, False)
            sample.set(ACTION, U_t, t)

            # Check if agent is keeping up
            sleep_time = start + (t + 1) * self.dt - time.time()
            if sleep_time < 0:
                logging.critical(
                    "Agent can't keep up.In timestep %i it is %fs behind." %
                    (t, sleep_time))
            elif sleep_time < self.dt / 2:
                logging.warning("Agent may not keep up (%.0f percent busy)" %
                                (((self.dt - sleep_time) / self.dt) * 100))

            # Wait for next timestep
            if sleep_time > 0:
                time.sleep(sleep_time)

        if save:
            self._samples[condition].append(sample)
        self.reset(reset_cond)
        return sample