def pack_sample(self, X, U): """Packs sample data into Sample object.""" assert X.shape[0] == self.T assert U.shape[0] == self.T assert X.shape[1] == self.dX assert U.shape[1] == self.dU sample = Sample(self) for sensor, idx in self._x_data_idx.items(): sample.set(sensor, X[:, idx]) for actuator, idx in self._u_data_idx.items(): sample.set(actuator, U[:, idx]) sample.set(ACTION, U) return sample
def msg_to_sample(ros_msg, agent): """Convert a SampleResult ROS message into a Sample Python object.""" sample = Sample(agent) velocity = np.array(ros_msg.velocity).reshape(7) joint_angles = np.array(ros_msg.joint_angles).reshape(7) ee_pos = np.array(ros_msg.ee_pos).reshape(9) ee_jacobians = np.array(ros_msg.ee_points_jacobian, order="F").reshape(9, 7) sample.set(JOINT_VELOCITIES, velocity) sample.set(JOINT_ANGLES, joint_angles) sample.set(END_EFFECTOR_POINTS, ee_pos) sample.set(END_EFFECTOR_POINT_JACOBIANS, ee_jacobians) return sample
def sample( self, policy, condition, save=True, noisy=True, reset_cond=None, randomize_initial_state=0, **kwargs, ): """Performs agent reset and rolls out given policy to collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. reset_cond: The initial condition to reset the agent into. randomize_initial_state: Perform random steps after resetting to simulate a noisy initial state. Returns: sample: A Sample object. """ if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Get a new sample sample = Sample(self) # Get initial state self.env.seed(None if reset_cond is None else self.x0[reset_cond]) obs = self.env.reset() if randomize_initial_state > 0: # Take random steps randomize initial state distribution self.env._set_action( (self.env.action_space.high - self.env.action_space.low) / 12 * np.random.normal(size=self.dU) * randomize_initial_state) for _ in range(5): self.sim.step() obs = self.env.step(np.zeros(self.dU))[0] self.set_states(sample, obs, 0) U_0 = policy.act(sample.get_X(0), sample.get_obs(0), 0, noise) sample.set(ACTION, U_0, 0) for t in range(1, self.T): if self.render: self.env.render(mode='human') # Get state obs, _, done, _ = self.env.step(sample.get_U(t - 1)) self.set_states(sample, obs, t) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) sample.set(ACTION, U_t, t) if done and t < self.T - 1: raise Exception('Iteration ended prematurely %d/%d' % (t + 1, self.T)) if save: self._samples[condition].append(sample) return sample
def sample(self, policy, condition, save=True, noisy=True, reset_cond=None, **kwargs): """Performs agent reset and rolls out given policy to collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. reset_cond: The initial condition to reset the agent into. Returns: sample: A Sample object. """ if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) sample = Sample(self) self.reset(reset_cond) # Execute policy over a time period of [0,T] start = time.time() for t in range(self.T): # Read sensors and store sensor data in sample latest_sample = self.get_data() for sensor_type in self.x_data_types: data = latest_sample.get(sensor_type) if self.scaler is not None: data = self.__transform(sensor_type, data) sample.set(sensor_type, data, t) # Compute site Jacobians jac = np.tile(self.jac[:3], (3, 1)) rotation = sp.spatial.transform.Rotation.from_euler( "XYZ", -latest_sample.get(END_EFFECTOR_ROTATIONS)) for i in range(3): rot_ee = rotation.apply(self.ee_points[i]) for k in range(6): jac[i * 3:(i + 1) * 3, k] += np.cross(self.jac[3:, k], rot_ee) sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=t) # Use END_EFFECTOR_POINTS as distance to target sample.set(END_EFFECTOR_POINTS, sample.get(END_EFFECTOR_POINTS, t) - self.ee_points_tgt / self.scaler.scale_[-9:], t=t) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) U_t = np.clip(U_t, -4, 4) # Perform action self.reset_arm(None, None, U_t, False) sample.set(ACTION, U_t, t) # Check if agent is keeping up sleep_time = start + (t + 1) * self.dt - time.time() if sleep_time < 0: logging.critical("Agent can't keep up. %fs behind." % sleep_time) elif sleep_time < self.dt / 2: logging.warning("Agent may not keep up (%.0f percent busy)" % (((self.dt - sleep_time) / self.dt) * 100)) # Wait for next timestep if sleep_time > 0: time.sleep(sleep_time) if save: self._samples[condition].append(sample) self.reset(reset_cond) return sample
def sample(self, policy, condition, save=True, noisy=True, reset_cond=None, **kwargs): """Performs agent reset and rolls out given policy to collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. reset_cond: The initial condition to reset the agent into. Returns: sample: A Sample object. """ # Get a new sample sample = Sample(self) sample_ok = False while not sample_ok: if not self.debug: self.reset(reset_cond) self.__init_opcua() if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = None # Execute policy over a time period of [0,T] start = time.time() for t in range(self.T): # Read sensors and store sensor data in sample def store_sensor(sensor): sample.set(sensor, self.read_sensor(sensor), t) self.pool.map(store_sensor, self.sensors) # Override sensors for override in self.sensor_overrides: if override['condition'](t): sensor = override['sensor'] sample.set(sensor, override['value'](sample, t), t) print('X_%02d' % t, sample.get_X(t)) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) # Override actuators for override in self.actuator_overrides: if override['condition'](t): actuator = override['actuator'] U_t[self._u_data_idx[actuator]] = np.copy( override['value']) # Send signals self.send_signals(t) # Perform action for actuator in self._u_data_idx: self.write_actuator(actuator, U_t[self._u_data_idx[actuator]]) sample.set(ACTION, U_t, t) print('U_%02d' % t, U_t) # Check if agent is keeping up sleep_time = start + (t + 1) * self.dt - time.time() if sleep_time < 0: logging.critical("Agent can't keep up. %fs behind." % sleep_time) elif sleep_time < self.dt / 2: logging.warning( "Agent may not keep up (%.0f percent busy)" % (((self.dt - sleep_time) / self.dt) * 100)) # Wait for next timestep if sleep_time > 0 and not self.debug: time.sleep(sleep_time) if save: self._samples[condition].append(sample) self.finalize_sample() sample_ok = self.debug or input('Continue?') == 'y' if not sample_ok: print('Repeating') return sample
def sample(self, policy, condition, save=True, noisy=True, reset_cond=None, **kwargs): """Performs agent reset and rolls out given policy to collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. reset_cond: The initial condition to reset the agent into. Returns: sample: A Sample object. """ if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) sample = Sample(self) self.reset(reset_cond) # Execute policy over a time period of [0,T] # TODO: Find better solution to change mode. # relax arm to change mode to torque. If this is not done, the mode will be changed in timestep t=0 causing # the loop to be slow in timestep t=1 because the mutex in the cpp is locked. """ self.relax_arm() time.sleep(1) start = time.time() for t in range(self.T): # Read sensors and store sensor data in sample latest_sample = self.get_data() for sensor_type in self.x_data_types: sample.set(sensor_type, latest_sample.get(sensor_type), t) sample.set(END_EFFECTOR_POINT_JACOBIANS, latest_sample.get(END_EFFECTOR_POINT_JACOBIANS), t=t) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) # TODO: find better solution to clip (same as in cpp) torque_limits_ = np.array([4.0, 4.0, 4.0, 4.0, 1.0, 1.0, .5]) U_t = np.clip(U_t, -torque_limits_, torque_limits_) # Perform action self.reset_arm(None, None, U_t, False) sample.set(ACTION, U_t, t) # Check if agent is keeping up sleep_time = start + (t + 1) * self.dt - time.time() if sleep_time < 0: logging.critical( "Agent can't keep up.In timestep %i it is %fs behind." % (t, sleep_time)) elif sleep_time < self.dt / 2: logging.warning("Agent may not keep up (%.0f percent busy)" % (((self.dt - sleep_time) / self.dt) * 100)) # Wait for next timestep if sleep_time > 0: time.sleep(sleep_time) if save: self._samples[condition].append(sample) self.reset(reset_cond) return sample