def sample(self, policy, condition, verbose=False, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to to used in the trial. condition (int): Which condition setup to run. verbose (boolean): Whether or not to plot the trial (not used here). save (boolean): Whether or not to store the trial into the samples. noisy (boolean): Whether or not to use noise during sampling. """ self._worlds[condition].run() self._worlds[condition].reset_world() b2d_X = self._worlds[condition].get_state() new_sample = self._init_sample(b2d_X) U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) for t in range(self.T): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) U[t, :] = policy.act(X_t, obs_t, t, noise[t, :]) if (t+1) < self.T: for _ in range(self._hyperparams['substeps']): self._worlds[condition].run_next(U[t, :]) b2d_X = self._worlds[condition].get_state() self._set_sample(new_sample, b2d_X, t) new_sample.set(ACTION, U) if save: self._samples[condition].append(new_sample)
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to to used in the trial. condition: Which condition setup to run. verbose: Whether or not to plot the trial. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. """ # Create new sample, populate first time step. mj_X = self._hyperparams['x0'][condition] U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) if np.any(self._hyperparams['x0var'][condition] > 0): x0n = self._hyperparams['x0var'] * \ np.random.randn(self._hyperparams['x0var'].shape) mj_X += x0n noisy_body_idx = self._hyperparams['noisy_body_idx'][condition] if noisy_body_idx.size > 0: for i in range(len(noisy_body_idx)): idx = noisy_body_idx[i] var = self._hyperparams['noisy_body_var'][condition][i] self._model[condition]['body_pos'][idx, :] += \ var * np.random.randn(1, 3) self._world[condition].set_model(self._model[condition]) if self._linear: dt = self._hyperparams['dt'] F = np.array([[ 1, 0, dt, 0, dt**2., 0], [0, 1, 0, dt, 0, dt**2.], [0, 0, 1, 0, dt, 0], [0, 0, 0, 1, 0, dt]]) new_sample = self._init_sample(condition) for t in range(self.T): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) mj_U = policy.act(X_t, obs_t, t, noise[t, :]) U[t, :] = mj_U if verbose: self._world[condition].plot(mj_X) if (t + 1) < self.T: for _ in range(self._hyperparams['substeps']): if self._linear: mj_X = F.dot(np.r_[mj_X, mj_U]) else: mj_X, _ = self._world[condition].step(mj_X, mj_U) #TODO: Some hidden state stuff will go here. self._data = self._world[condition].get_data() self._set_sample(new_sample, mj_X, t, condition) new_sample.set(ACTION, U) if save: self._samples[condition].append(new_sample) return new_sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): self._init_tf(policy.dU) self.reset(condition) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][condition].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] if self.use_tf is False: sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout'] ) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample else: self._trial_service.publish(trial_command) sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to to used in the trial. condition: Which condition setup to run. verbose: Whether or not to plot the trial. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. """ # Create new sample, populate first time step. feature_fn = None if 'get_features' in dir(policy): feature_fn = policy.get_features new_sample = self._init_sample(condition, feature_fn=feature_fn) mj_X = self._hyperparams['x0'][condition] U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) if np.any(self._hyperparams['x0var'][condition] > 0): x0n = self._hyperparams['x0var'] * \ np.random.randn(self._hyperparams['x0var'].shape) mj_X += x0n noisy_body_idx = self._hyperparams['noisy_body_idx'][condition] if noisy_body_idx.size > 0: for i in range(len(noisy_body_idx)): idx = noisy_body_idx[i] var = self._hyperparams['noisy_body_var'][condition][i] self._model[condition]['body_pos'][idx, :] += \ var * np.random.randn(1, 3) # Take the sample. for t in range(self.T): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) mj_U = policy.act(X_t, obs_t, t, noise[t, :]) U[t, :] = mj_U if verbose: self._world[condition].plot(mj_X) if (t + 1) < self.T: for _ in range(self._hyperparams['substeps']): mj_X, _ = self._world[condition].step(mj_X, mj_U) self._data = self._world[condition].get_data() self._set_sample(new_sample, mj_X, t, condition, feature_fn=feature_fn) new_sample.set(ACTION, U) new_sample.set(NOISE, noise) if save: self._samples[condition].append(new_sample) return new_sample
def execute(self, policy): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): self._init_tf(policy.dU) #self.reset(condition) # Generate noise. noise = generate_noise(self.T, self.dU, self._hyperparams) # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() #### @todo: tgt command try ##########Brook#################################################### #trial_command.ee_points_tgt = self._tgt_subscribe.subscribe_and_wait() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][0].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] if self.use_tf is False: sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout'] ) sample = msg_to_sample(sample_msg, self) else: self._trial_service.publish(trial_command) sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self)
def sample(self, policy, condition, verbose=True, save=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. Returns: sample: A Sample object. """ self.reset(condition) # Generate noise. noise = generate_noise(self.T, self.dU, self._hyperparams) # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][condition].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout'] ) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample
class AgentBusPol(Agent): """ All communication between the algorithms and Vehicle is done through this class. """ def __init__(self, hyperparams): config = deepcopy(AGENT_BUS) config.update(hyperparams) Agent.__init__(self, config) self._setup_conditions() self.reach_start = None self.reach_end = None self.finishing = None self.finishing_time = None self._setup_world(self._hyperparams["world"], self._hyperparams["target_state"], self._hyperparams["render"], self._hyperparams["polygons"], self._hyperparams["map_size"], self._hyperparams["map_state"], self._hyperparams["display_center"],) def _setup_conditions(self): """ Helper method for setting some hyperparameters that may vary by condition. """ conds = self._hyperparams['conditions'] # for field in ('x0', 'x0var', 'pos_body_idx', 'pos_body_offset', # 'noisy_body_idx', 'noisy_body_var', 'filename'): # self._hyperparams[field] = setup(self._hyperparams[field], conds) self._hyperparams['x0'] = setup(self._hyperparams['x0'], conds) def _setup_world(self, world, target_state, render, polygons, map_size, map_state, display_center): """ Helper method for handling setup of the Box2D world. """ self.x0 = self._hyperparams["x0"] # initial state self._worlds = [world(self.x0[i], target_state, render, map_size, polygons=polygons, map_state=map_state, display_center=display_center) for i in range(self._hyperparams['conditions'])] def sample(self, policy, condition, verbose=False, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to to used in the trial. condition (int): Which condition setup to run. verbose (boolean): Whether or not to plot the trial (not used here). save (boolean): Whether or not to store the trial into the samples. noisy (boolean): Whether or not to use noise during sampling. """ # Modified on April 2, referring to agent_mjc # reset the world and assign the initialized state to new_sample # self._worlds[condition].run() # self._worlds[condition].reset_world() feature_fn = None if 'get_features' in dir(policy): feature_fn = policy.get_features b2d_X = self._worlds[condition].get_state() new_sample = self._init_sample(b2d_X) # initialize a dummy action sequence U = np.zeros([self.T, self.dU]) self.reach_start = None self.reach_end = None if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) for t in range(self.T): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) U[t, :] = policy.act(X_t, obs_t, t, noise[t, :]) # print(U[t]) if (t+1) < self.T: for _ in range(self._hyperparams['substeps']): self._worlds[condition].run_next(U[t, :]) if self._worlds[condition].reach: self._worlds[condition].reach = False if self.reach_start == None: self.reach_start = t # print("reach_start", t) elif self.reach_end == None or t>self.reach_end: self.reach_end = t if t==self.T-2: # continue reaching till the end of series # print("reach_end", self.reach_end) period = self.reach_end - self.reach_start # print("reach period", period) if period > 3: self.finishing = True # self.finishing_time = self.reach_end self.finishing_time = self.reach_start elif self.reach_end == t-1 : # just leave # print("reach_end", self.reach_end) period = self.reach_end - self.reach_start # print("reach period", period) if period > 1: self.finishing = True # self.finishing_time = self.reach_end self.finishing_time = self.reach_start if self.finishing_time == 0: self.finishing_time = 1 b2d_X = self._worlds[condition].get_state() self._set_sample(new_sample, b2d_X, t) new_sample.set(ACTION, U) if save: self._samples[condition].append(new_sample) # if self.finishing: # print("agent_bus t= ", self.finishing_time) return new_sample
def sample( self, policy, condition, save=True, noisy=True, reset_cond=None, randomize_initial_state=0, **kwargs, ): """Performs agent reset and rolls out given policy to collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. reset_cond: The initial condition to reset the agent into. randomize_initial_state: Perform random steps after resetting to simulate a noisy initial state. Returns: sample: A Sample object. """ if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Get a new sample sample = Sample(self) # Get initial state self.env.seed(None if reset_cond is None else self.x0[reset_cond]) obs = self.env.reset() if randomize_initial_state > 0: # Take random steps randomize initial state distribution self.env._set_action( (self.env.action_space.high - self.env.action_space.low) / 12 * np.random.normal(size=self.dU) * randomize_initial_state) for _ in range(5): self.sim.step() obs = self.env.step(np.zeros(self.dU))[0] self.set_states(sample, obs, 0) U_0 = policy.act(sample.get_X(0), sample.get_obs(0), 0, noise) sample.set(ACTION, U_0, 0) for t in range(1, self.T): if self.render: self.env.render(mode='human') # Get state obs, _, done, _ = self.env.step(sample.get_U(t - 1)) self.set_states(sample, obs, t) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) sample.set(ACTION, U_t, t) if done and t < self.T - 1: raise Exception('Iteration ended prematurely %d/%d' % (t + 1, self.T)) if save: self._samples[condition].append(sample) return sample
def sample(self, policy, condition, save=True, noisy=True, reset_cond=None, **kwargs): """Performs agent reset and rolls out given policy to collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. reset_cond: The initial condition to reset the agent into. Returns: sample: A Sample object. """ if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) sample = Sample(self) self.reset(reset_cond) # Execute policy over a time period of [0,T] start = time.time() for t in range(self.T): # Read sensors and store sensor data in sample latest_sample = self.get_data() for sensor_type in self.x_data_types: data = latest_sample.get(sensor_type) if self.scaler is not None: data = self.__transform(sensor_type, data) sample.set(sensor_type, data, t) # Compute site Jacobians jac = np.tile(self.jac[:3], (3, 1)) rotation = sp.spatial.transform.Rotation.from_euler( "XYZ", -latest_sample.get(END_EFFECTOR_ROTATIONS)) for i in range(3): rot_ee = rotation.apply(self.ee_points[i]) for k in range(6): jac[i * 3:(i + 1) * 3, k] += np.cross(self.jac[3:, k], rot_ee) sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=t) # Use END_EFFECTOR_POINTS as distance to target sample.set(END_EFFECTOR_POINTS, sample.get(END_EFFECTOR_POINTS, t) - self.ee_points_tgt / self.scaler.scale_[-9:], t=t) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) U_t = np.clip(U_t, -4, 4) # Perform action self.reset_arm(None, None, U_t, False) sample.set(ACTION, U_t, t) # Check if agent is keeping up sleep_time = start + (t + 1) * self.dt - time.time() if sleep_time < 0: logging.critical("Agent can't keep up. %fs behind." % sleep_time) elif sleep_time < self.dt / 2: logging.warning("Agent may not keep up (%.0f percent busy)" % (((self.dt - sleep_time) / self.dt) * 100)) # Wait for next timestep if sleep_time > 0: time.sleep(sleep_time) if save: self._samples[condition].append(sample) self.reset(reset_cond) return sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): self._init_tf(policy.dU) self.reset() self.all_object_traj = {id: np.empty((0, 3)) for id in self.target_ids} if self.take_video: self.rgb_writer = create_writer( self._hyperparams['data_files_dir'], classifier='itr_{}'.format(self.idx_curr_itr), fps=5) X, new_sample = self._init_sample(condition) b_X = self._hyperparams['x0'][condition] U = np.zeros([self.T, self.dU]) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) noise /= 1 # Take the sample. for t in range(self.T): curr_time = rospy.get_time() X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) # print('cost: {}'.format(np.linalg.norm(self._hyperparams['cost_tgt'][t, :6] - X[:6]))) b_U = policy.act(X_t, obs_t, t, noise[t, :]) # print(b_U) U[t, :] = b_U if (t + 1) % 10 == 0: print('sample policy action at t={}: {}'.format(t, b_U)) b_U[0:3] = np.clip( b_U[0:3], -self._hyperparams['max_velocity'], self._hyperparams['max_velocity']) # clip task space b_U[3] = np.clip(b_U[3], -0.4, 0.4) # clip rotation b_U[-1] = np.clip(b_U[-1], -30, 30) # clip gripper b_U *= self._hyperparams['set_action_to_zero'] # print(_U) if (t + 1) < self.T: # b_X, b_U_check = self._step(b_U, cTruerr_time) # self._step_taskspace_vel(b_U, X, curr_time, t) self._step_taskspace(b_U, X, curr_time, t) X = self._set_sample(new_sample, t, condition) self._trial_gripper.command_position( self._hyperparams['gripper_reset_position'] ) # open gripper at reset (100 is fully opened) new_sample.set(ACTION, U) new_sample.set(NOISE, noise) xx, image = self._get_current_state(t) if self.take_video: self.rgb_writer.append_data(image) self.axes[1, 0].clear() self.axes[1, 0].set_title('ee_to_anchor_distance') self.axes[1, 0].plot(np.array(self.geom_dist_ee_to_anchor_traj)[:, 0], c='r', label='x') self.axes[1, 0].plot(np.array(self.geom_dist_ee_to_anchor_traj)[:, 1], c='g', label='y') self.axes[1, 0].plot(np.array(self.geom_dist_ee_to_anchor_traj)[:, 2], c='b', label='z') self.axes[1, 0].legend() self.axes[1, 1].clear() self.axes[1, 1].set_title('object2_to_anchor_distance') self.axes[1, 1].plot(np.array(self.geom_dist_object2_to_anchor_traj)[:, 0], c='r', label='x') self.axes[1, 1].plot(np.array(self.geom_dist_object2_to_anchor_traj)[:, 1], c='g', label='y') self.axes[1, 1].plot(np.array(self.geom_dist_object2_to_anchor_traj)[:, 2], c='b', label='z') self.axes[1, 1].legend() self.axes[3, 1].clear() self.axes[3, 1].set_title('pixel_feature_state') self.axes[3, 1].plot(np.array(self.feature_traj), c='r') self.axes[3, 1].legend() self.geom_dist_ee_to_anchor_traj = [] self.geom_dist_object2_to_anchor_traj = [] self.feature_traj = [] # print("reached endpoint: ",sxx[-6:].tolist()) print('finished rollout {}'.format(self.idx_curr_rollout + 1)) self.idx_curr_rollout += 1 if self.take_video: self.rgb_writer.close() if save: self._samples[condition].append(new_sample) return new_sample
def sample(self, policy, condition, verbose=True, save=True, noisy=False, screenshot_prefix=None, superball_parameters=None): if superball_parameters is None: superball_parameters = {} sample_params = copy.deepcopy(SUPERBALL_DEFAULT_SAMPLE_PARAMETERS) sample_params.update(superball_parameters) rospy.set_param('/verbose_trial', int(verbose)) if screenshot_prefix: import pyscreenshot if sample_params['horizon'] is not None: # We don't save the sample if the horizon is customly defined horizon = sample_params['horizon'] save = False else: horizon = self.T gain = sample_params['motor_position_control_gain'] # Reset or relax if sample_params['reset']: self.reset(0, sample_params['bottom_face'], sample_params['start_motor_positions']) elif sample_params['relax']: self.relax() new_sample = self._init_sample(horizon) U = np.zeros([horizon, self.dU]) noise = generate_noise(horizon, self.dU, self._hyperparams) for t in range(horizon): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) U_t = policy.act(X_t, obs_t, t, noise[t, :]) if sample_params['debug'] and t >= horizon - 1: sys.stdout.write('[') for elem in X_t[X_t.shape[0] - 24:X_t.shape[0] - 12]: sys.stdout.write('{}, '.format(elem)) sys.stdout.write('],\n') U[t, :] = U_t if (t + 1) < horizon: if self._hyperparams['ctrl_vel']: self._set_motor_velocities(U_t) else: self._set_motor_positions(U_t) self._advance_simulation() if screenshot_prefix: img = screenshot_prefix + '_' + str(t).zfill(3) + '.png' pyscreenshot.grab(bbox=(65, 50, 705, 530)).save(img) self._set_sample(new_sample, t) new_sample.set(ACTION, U) if save: self._samples[condition].append(new_sample) return new_sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if self.vid_seqname % 4 == 0: rgb_writer = self.create_writers() else: rgb_writer = None ## RESET ROBOT!!!! <--- implement that!! self.env.reset(self.reset_condition) new_sample, image_data = self._init_sample(condition) #mj_X = self._hyperparams['x0'][condition] # = b_X U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) noise *= 1 gripper_persistency_counter = 0 allow_gripper_change = False # Take the sample. for t in range(self.T): # 100 steps if rgb_writer is not None: rgb_writer.append_data(image_data[0]) X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) mj_U = policy.act(X_t, obs_t, t, noise[t, :]) # get actions from policy # mj_U[:3] /= 10 # mj_U[3] /= 10000 # mj_U[0] = 0 mj_U[3] = 0 eepos = p.getLinkState( self.o.kukaobject.kukaId, self.o.kukaobject.kukaEndEffectorIndex)[ 0] # is that correct?? or is it another index?? if eepos[2] + mj_U[2] < 0.8200000357627868: # Prevent hitting table mj_U[2] = 0 if eepos[1] + mj_U[2] > 0.000000357627868: # Prevent going too far mj_U[1] = 0 delta = self.hyperparams['delta_taskspace'] self.taskspace_deltas = np.array([delta, delta, delta]) # mj_U = np.clip(mj_U, -delta, delta) norm = np.sqrt((np.sum(mj_U**2))) if norm >= delta: mj_U = mj_U * delta / norm print("saturated action") norm_check = np.sqrt((np.sum(mj_U**2))) # print(mj_U) #mj_U = np.clip(mj_U, -self.taskspace_deltas, self.taskspace_deltas) U[t, :] = mj_U #mj_U[7] *= 7 if gripper_persistency_counter > 10 and gripper_persistency_counter < 50: allow_gripper_change = True gripper_persistency_counter = 60 gripper_persistency_counter += 1 # check if gripper is in same position as last time step if (t + 1) < self.T: curr_time = time.time() # print("step {}".format(t)) ### step simulation with mj_X and mj_U if self._hyperparams['control_type'] == 'task': self.step_taskspace_trans(mj_U, allow_gripper_change) allow_gripper_change = False else: self.step_jointspace(mj_U) if t >= self.T - 3: self.o.kukaobject.open_gripper() #print(np.linalg.norm(object_p3d[:3])) #print(object_p3d[:3]) run_time = time.time() - curr_time #print("runtime: {}".format(run_time)) stateX, jac_t, image_data = self.get_state(t) time.sleep(max(self._hyperparams['dt'] - run_time, 0.0)) self._set_sample( new_sample, stateX, jac_t, t, condition) # is jac_t correct or should it be jac_r?? new_sample.set(ACTION, U) new_sample.set(NOISE, noise) if save: self._samples[condition].append(new_sample) self.vid_seqname += 1 if rgb_writer is not None: rgb_writer.close() # print("distance target to anchor: {}, ground truth distance: {}".format(np.linalg.norm(new_sample.get(OBJECT_POSE, t=self.T-1)), np.linalg.norm(self.hyperparams['debug_cost_tgt'][-1]))) return new_sample
def sample(self, policy, condition, iteration, verbose=True, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to to used in the trial. condition: Which condition setup to run. verbose: Whether or not to plot the trial. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. """ # Create new sample, populate first time step. feature_fn = None if 'get_features' in dir(policy): feature_fn = policy.get_features # noisy = False ## TODO : where below line should be located? new_sample = self._init_sample(condition, feature_fn=feature_fn) mj_X = self._hyperparams['x0'][condition] U = np.zeros([self.T, self.dU]) U_origin = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) if np.any(self._hyperparams['x0var'][condition] > 0): x0n = self._hyperparams['x0var'] * \ np.random.randn(self._hyperparams['x0var'].shape) mj_X += x0n noisy_body_idx = self._hyperparams['noisy_body_idx'][condition] if noisy_body_idx.size > 0: for i in range(len(noisy_body_idx)): idx = noisy_body_idx[i] var = self._hyperparams['noisy_body_var'][condition][i] self._model[condition]['body_pos'][idx, :] += \ var * np.random.randn(1, 3) torq_max1 = 45 # origin 87 torq_max2 = 6 # origin 12 # self.panda.set_force_threshold_for_collision([20, 20, 10, 25, 25, 25]) # X,Y,Z,R,P,Y self.panda.set_collision_threshold( cartesian_forces=[20, 20, 10, 25, 25, 25]) # X,Y,Z,R,P,Y while True: # panda : move to joint position ## TODO : where below line should be located? # new_sample = self._init_sample(condition, feature_fn=feature_fn) # new_sample: class 'Sample' try: # self.panda.enable_robot() if not self.panda.is_enabled_robot(): raise StopIteration self.panda.move_to_joint_position( self._hyperparams['x0'][condition][0:7]) time.sleep(2) # Take the sample. for t in range(self.T): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) mj_U = policy.act(X_t, obs_t, t, noise[t, :]) mj_U_origin = mj_U.copy() for i in range(len(mj_U)): if i < 4 and mj_U[i] > torq_max1: mj_U[i] = torq_max1 elif i < 4 and mj_U[i] < -torq_max1: mj_U[i] = -torq_max1 elif i >= 4 and mj_U[i] > torq_max2: mj_U[i] = torq_max2 elif i >= 4 and mj_U[i] < -torq_max2: mj_U[i] = -torq_max2 U[t, :] = mj_U U_origin[t, :] = mj_U_origin # print 'mj_U: ', mj_U # print 'mj_U dict: ', self.list_to_dict(mj_U) if (t + 1) < self.T: # self.panda.enable_robot() # for _ in range(self._hyperparams['substeps']): if self.panda.has_collided(): raise StopIteration if not self.panda.is_enabled_robot(): raise StopIteration # panda move with mj_U # self.panda.set_joint_velocities(self.list_to_dict(mj_U)) # self.panda.exec_velocity_cmd(mj_U) self.panda.exec_torque_cmd(mj_U) # self.panda.exec_position_cmd(mj_U) print("current step(t): ", t) self._set_sample(new_sample, mj_X, t, condition, feature_fn=feature_fn) self.r.sleep() # to sample data at some frequency for i in range( 15 ): # Torque commands for allowing robot finish the trajectory self.panda.exec_torque_cmd([0, 0, 0, 0, 0, 0, 0]) time.sleep(1) break except StopIteration: print("robot stopped!!!") self.panda.enable_robot() time.sleep(2) continue finally: f = '/home/panda_gps/gps/experiments/panda_test_dongju/action_origin_' + str( iteration) + '.npy' np.save(f, U_origin) f = '/home/panda_gps/gps/experiments/panda_test_dongju/action_clipped_' + str( iteration) + '.npy' np.save(f, U) new_sample.set(ACTION, U) new_sample.set(NOISE, noise) if save: self._samples[condition].append(new_sample) return new_sample
def sample( self, policy, condition, verbose=True, save=True, noisy=True, use_TfController=False, timeout=None, reset_cond=None, record=False ): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. use_TfController: Whether to use the syncronous TfController Returns: sample: A Sample object. """ if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Get a new sample sample = Sample(self) self.env.video_callable = lambda episode_id, record=record: record # Get initial state self.env.seed(None if reset_cond is None else self.x0[reset_cond]) obs = self.env.reset() if self._hyperparams.get('initial_step', 0) > 0: # Take one random step to get a slightly random initial state distribution U_initial = (self.env.action_space.high - self.env.action_space.low ) / 12 * np.random.normal(size=self.dU) * self._hyperparams['initial_step'] obs = self.env.step(U_initial)[0] self.set_states(sample, obs, 0) U_0 = policy.act(sample.get_X(0), sample.get_obs(0), 0, noise) sample.set(ACTION, U_0, 0) for t in range(1, self.T): if not record and self.render: self.env.render(mode='human') # TODO add hyperparam # Get state obs, _, done, _ = self.env.step(sample.get_U(t - 1)) self.set_states(sample, obs, t) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) sample.set(ACTION, U_t, t) if done and t < self.T - 1: raise Exception('Iteration ended prematurely %d/%d' % (t + 1, self.T)) if save: self._samples[condition].append(sample) self.active = False #print("X", sample.get_X()) #print("U", sample.get_U()) return sample
def sample(self, policy, condition, iteration, verbose=True, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to to used in the trial. condition: Which condition setup to run. verbose: Whether or not to plot the trial. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. """ # Create new sample, populate first time step. feature_fn = None if 'get_features' in dir(policy): feature_fn = policy.get_features new_sample = self._init_sample(condition, feature_fn=feature_fn) mj_X = self._hyperparams['x0'][condition] U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) if np.any(self._hyperparams['x0var'][condition] > 0): x0n = self._hyperparams['x0var'] * \ np.random.randn(self._hyperparams['x0var'].shape) mj_X += x0n noisy_body_idx = self._hyperparams['noisy_body_idx'][condition] if noisy_body_idx.size > 0: for i in range(len(noisy_body_idx)): idx = noisy_body_idx[i] var = self._hyperparams['noisy_body_var'][condition][i] self._model[condition]['body_pos'][idx, :] += \ var * np.random.randn(1, 3) ### add a function to move indy robot to initial position. self.indy.joint_move_to(self._hyperparams[‘x0’][condition][0:6]) ### time.sleep(2) # Take the sample. for t in range(self.T): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) mj_U = policy.act(X_t, obs_t, t, noise[t, :]) U[t, :] = mj_U if (t + 1) < self.T: ### add a function that send a torque command to an indy robot. self.indy.joint_move_to(mj_U) ### print("current step(t): ", t) self._set_sample(new_sample, mj_X, t, condition, feature_fn=feature_fn) self.r.sleep() # to sample data at some frequency new_sample.set(ACTION, U) new_sample.set(NOISE, noise) if save: self._samples[condition].append(new_sample) return new_sample
def sample(self, policy, condition, save=True, noisy=True, reset_cond=None, **kwargs): """Performs agent reset and rolls out given policy to collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. reset_cond: The initial condition to reset the agent into. Returns: sample: A Sample object. """ # Get a new sample sample = Sample(self) sample_ok = False while not sample_ok: if not self.debug: self.reset(reset_cond) self.__init_opcua() if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = None # Execute policy over a time period of [0,T] start = time.time() for t in range(self.T): # Read sensors and store sensor data in sample def store_sensor(sensor): sample.set(sensor, self.read_sensor(sensor), t) self.pool.map(store_sensor, self.sensors) # Override sensors for override in self.sensor_overrides: if override['condition'](t): sensor = override['sensor'] sample.set(sensor, override['value'](sample, t), t) print('X_%02d' % t, sample.get_X(t)) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) # Override actuators for override in self.actuator_overrides: if override['condition'](t): actuator = override['actuator'] U_t[self._u_data_idx[actuator]] = np.copy( override['value']) # Send signals self.send_signals(t) # Perform action for actuator in self._u_data_idx: self.write_actuator(actuator, U_t[self._u_data_idx[actuator]]) sample.set(ACTION, U_t, t) print('U_%02d' % t, U_t) # Check if agent is keeping up sleep_time = start + (t + 1) * self.dt - time.time() if sleep_time < 0: logging.critical("Agent can't keep up. %fs behind." % sleep_time) elif sleep_time < self.dt / 2: logging.warning( "Agent may not keep up (%.0f percent busy)" % (((self.dt - sleep_time) / self.dt) * 100)) # Wait for next timestep if sleep_time > 0 and not self.debug: time.sleep(sleep_time) if save: self._samples[condition].append(sample) self.finalize_sample() sample_ok = self.debug or input('Continue?') == 'y' if not sample_ok: print('Repeating') return sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to to used in the trial. condition: Which condition setup to run. verbose: Whether or not to plot the trial. save: Whether or not to store the trial into the samples. """ # Create new sample, populate first time step. #self._init_tf(policy.dU) feature_fn = None if 'get_features' in dir(policy): feature_fn = policy.get_features mj_X = self._hyperparams['x0'][condition] U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Not called if np.any(self._hyperparams['x0var'][condition] > 0): x0n = self._hyperparams['x0var'] * \ np.random.randn(self._hyperparams['x0var'].shape) mj_X += x0n noisy_body_idx = self._hyperparams['noisy_body_idx'][condition] # Not called if noisy_body_idx.size > 0: for i in range(len(noisy_body_idx)): idx = noisy_body_idx[i] var = self._hyperparams['noisy_body_var'][condition][i] self._model[condition]['body_pos'][idx, :] += \ var * np.random.randn(1, 3) # self._world[condition].set_model(self._model[condition]) ## INIT BAXTER #self.baxter.move_baxter_to_joint_positions([0.32, -0.71, 0.68, 1.09, 0.07, 0.76, 0.13]) # for ball_punching task #self.baxter.move_baxter_to_joint_positions([0.27, -1.14, 0.98, 1.60, 0.15, 0.51, 0.27]) self.baxter.move_baxter_to_joint_positions( self._hyperparams['x0'][condition][0:7]) new_sample = self._init_sample( condition, feature_fn=feature_fn) # new_sample: class 'Sample' for t in range(self.T): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) # set the ACTION for the bot gotten from the policy calculations, and apply. print policy #mj_U = policy.act(X_t, obs_t, t, noise[t, :]) mj_U = policy.act(X_t, obs_t, t, noise[t, :], condition) print mj_U U[t, :] = mj_U # print 'the action to take in step ' + str(t) + ' is: ' + str(mj_U) # if verbose: # self._world[condition].plot(mj_X) # every step but the last if (t + 1) < self.T: for _ in range(self._hyperparams['substeps']): # This is the call to mjcpy to set the robot # mj_X, _ = self._world[condition].step(mj_X, mj_U) # Set the baxter joint velocities through the Baxter API self.baxter.set_baxter_joint_velocities(mj_U) #self.baxter.set_baxter_joint_positions(mj_U) #self.baxter.set_baxter_joint_torques(mj_U) #print "mj_U: ", mj_U #print "mj_U.shape; ", mj_U.shape # mj_X[self._joint_idx] = self.baxter.get_baxter_joint_angles_positions() # mj_X[self._vel_idx] = self.baxter.get_baxter_joint_angles_velocities() # print 'here is mj_X: ', mj_X # mj_X = self.baxter.get_baxter_joint_angles() #TODO: Some hidden state stuff will go here. # self._data = self._world[condition].get_data() #time.sleep(1) print "\ncurrent step(t): ", t self._set_sample(new_sample, mj_X, t, condition, feature_fn=feature_fn) new_sample.set(ACTION, U) new_sample.set(NOISE, noise) if save: self._samples[condition].append(new_sample) return new_sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ rgb_writer = self.create_writers() orn = [0.70603903128, 0.708148792076, 0, 0] #orn = p.getQuaternionFromEuler([-math.pi/2,0, math.pi/2]) pos = [1.0, -0.400000, 0.9] self.env.reset(pos + orn) new_sample, image_data = self._init_sample(condition) U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Take the sample. for t in range(self.T): # 100 steps if rgb_writer is not None: rgb_writer.append_data(image_data[0]) X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) mj_U = policy.act(X_t, obs_t, t, noise[t, :]) # get actions from policy U[t, :] = mj_U eepos = p.getLinkState( self.o.kukaobject.kukaId, self.o.kukaobject.kukaEndEffectorIndex)[ 0] # is that correct?? or is it another index?? deltas = self._hyperparams['delta_taskspace'] mj_U = self.clip_actions(eepos, mj_U, deltas) if (t + 1) < self.T: curr_time = time.time() if self._hyperparams['control_type'] == 'task': self.step_taskspace_trans(mj_U, allow_gripper_change) allow_gripper_change = False else: self.step_jointspace(mj_U) stateX, jac_t, image_data = self.get_state(t) run_time = time.time() - curr_time time.sleep(max(self._hyperparams['dt'] - run_time, 0.0)) self._set_sample( new_sample, stateX, jac_t, t, condition) # is jac_t correct or should it be jac_r?? # new_sample.set(ACTION, U) # new_sample.set(NOISE, noise # ) self.vid_seqname += 1 if rgb_writer is not None: rgb_writer.close() # if save: # self._samples[condition].append(new_sample) return new_sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): self._init_tf(policy.dU) self.reset(condition) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute trial. trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][condition].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['obs_include'] if self.use_tf is False or not isinstance(policy, TfPolicy): print 'Not using TF controller' sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout'] ) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample else: ''' print 'Using TF controller' self._trial_service.publish(trial_command) sample_msg = self.run_trial_tf(policy, condition, time_to_run=self._hyperparams['trial_timeout']) pdb.set_trace() sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample ''' self.trial_manager.prep(policy, condition) self._trial_service.publish(trial_command, wait=True) self.trial_manager.run(self._hyperparams['trial_timeout']) while self._trial_service._waiting: print 'Waiting for sample to come in' rospy.sleep(1.0) sample_msg = self._trial_service._subscriber_msg sample = msg_to_sample(sample_msg, self) sample.set(NOISE, noise) sample.set(TIMESTEP, np.arange(self.T).reshape((self.T,1))) return sample
def sample(self, policy, condition, verbose=False, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to be used in the trial. condition (int): Which condition setup to run. verbose (boolean): Whether or not to plot the trial (not used here). save (boolean): Whether or not to store the trial into the samples. noisy (boolean): Whether or not to use noise during sampling. """ print('taking sample') self.restart_simulation() vrep_X = self.retrieve_state() # get simulation state new_sample = self._init_sample( vrep_X ) # initialise sample with this world state at initial time step U = np.zeros( [self.T, self.dU] ) # initialise episode action vector dims with episode time span and action space dim if noisy: noise = generate_noise( self.T, self.dU, self._hyperparams ) # Generate a T x dU gaussian-distributed noise vector else: noise = np.zeros((self.T, self.dU)) # vector of zeros for t in range(self.T): # iterate over episode time-steps X_t = new_sample.get_X( t=t ) # get state vector of joint angles, joint velocities, and 3D end-effector point concatended for current time-step obs_t = new_sample.get_obs( t=t) # get NULL observation for simple trajectory optimisation U[t, :] = policy.act( X_t, obs_t, t, noise[t, :] ) # return action for current state, and fill in entry of U vector for time step t if ( t + 1 ) < self.T: # provided we are not on the final iteration of the for loop for _ in range( self._hyperparams['substeps'] ): # iterate over sub_steps, (i.e. how much frame skipping is there for repeating actions) self.step_simulation(U[t, :]) vrep_X = self.retrieve_state() # get simulation state self._set_sample( new_sample, vrep_X, t) # add this information to new_sample object self.stop_simulation() new_sample.set( ACTION, U ) # having run to end of trajectory, set saved action vector U as ACTION data in new_sample _data, to provide full sample information in new_sample object if save: # if want to save sample to agent object self._samples[condition].append( new_sample) # append samples to agent object _samples variable return new_sample # 0: actions 1: joint angles 2: joint velocities 3: end-effector points (3D)
def sample(self, policy, condition, verbose=True, save=True, noisy=True, use_TfController=False, first_itr=False, timeout=None, reset=True, rnd=None): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. use_TfController: Whether to use the syncronous TfController Returns: sample: A Sample object. """ if use_TfController: self._init_tf(policy, policy.dU) self.use_tf = True self.cur_timestep = 0 self.sample_save = save self.active = True self.policy = policy if reset: self.reset(condition, rnd=rnd) self.condition = condition # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) self.noise = noise else: noise = np.zeros((self.T, self.dU)) self.noise = None # Fill in trial command trial_command = TrialCommand() trial_command.id = self._get_next_seq_id() trial_command.controller = \ policy_to_msg(policy, noise, use_TfController=use_TfController) if timeout is not None: trial_command.T = timeout else: trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][self.condition].tolist() trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] # Execute trial. sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=(trial_command.T + self._hyperparams['trial_timeout'])) if self.vision_enabled: sample_msg = self.add_rgb_stream_to_sample(sample_msg) sample = msg_to_sample(sample_msg, self) #sample = self.replace_samplestates_with_errorstates(sample, self.x_tgt[condition]) if save: self._samples[condition].append(sample) self.active = False return sample
def sample(self, itr, policy, condition, verbose=True, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: itr : to name data file with iteration number, can erase when it is not neccessary policy: Policy to be used in the trial. condition: Which condition setup to run. verbose: Whether or not to plot the trial. save: Whether or not to store the trial into the samples. """ img = [] fp = [] obs = [] # Create new sample, populate first time step. #self._init_tf(policy.dU) feature_fn = None if 'get_features' in dir(policy): feature_fn = policy.get_features mj_X = self._hyperparams['x0'][condition] U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Not called if np.any(self._hyperparams['x0var'][condition] > 0): x0n = self._hyperparams['x0var'] * \ np.random.randn(self._hyperparams['x0var'].shape) mj_X += x0n noisy_body_idx = self._hyperparams['noisy_body_idx'][condition] # Not called if noisy_body_idx.size > 0: for i in range(len(noisy_body_idx)): idx = noisy_body_idx[i] var = self._hyperparams['noisy_body_var'][condition][i] self._model[condition]['body_pos'][idx, :] += \ var * np.random.randn(1, 3) # self._world[condition].set_model(self._model[condition]) ## INIT BAXTER #self.baxter.move_baxter_to_joint_positions([0.32, -0.71, 0.68, 1.09, 0.07, 0.76, 0.13]) # for ball_punching task #self.baxter.move_baxter_to_joint_positions([0.27, -1.14, 0.98, 1.60, 0.15, 0.51, 0.27]) self.baxter.move_baxter_to_joint_positions( self._hyperparams['x0'][condition][0:7]) new_sample = self._init_sample( condition, feature_fn=feature_fn) # new_sample: class 'Sample' for t in range(self.T): # for t in range(12): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) print obs_t.shape obs.append(obs_t) # set the ACTION for the bot gotten from the policy calculations, and apply. #mj_U = policy.act(X_t, obs_t, t, noise[t, :]) mj_U = policy.act(X_t, obs_t, t, noise[t, :], condition) U[t, :] = mj_U # if verbose: # self._world[condition].plot(mj_X) # every step but the last if (t + 1) < self.T: for _ in range(self._hyperparams['substeps']): # This is the call to mjcpy to set the robot # mj_X, _ = self._world[condition].step(mj_X, mj_U) # Set the baxter joint velocities through the Baxter API self.baxter.set_baxter_joint_velocities(mj_U) #self.baxter.set_baxter_joint_positions(mj_U) #self.baxter.set_baxter_joint_torques(mj_U) # mj_X[self._joint_idx] = self.baxter.get_baxter_joint_angles_positions() # mj_X[self._vel_idx] = self.baxter.get_baxter_joint_angles_velocities() # mj_X = self.baxter.get_baxter_joint_angles() #TODO: Some hidden state stuff will go here. # self._data = self._world[condition].get_data() #time.sleep(1) print "current step(t): ", t self._set_sample(new_sample, mj_X, t, condition, feature_fn=feature_fn) if t == 0: raw_input('first time step end') fp_t = new_sample.get(IMAGE_FEAT, t) # img_t = self._get_image_from_obs(obs_t) img_t = new_sample.get(RGB_IMAGE, t) # path = '/hdd/gps-master/experiments/test_obs/data_files/check_obs/' + 'img_%d' % t # np.save(path, img_t) fp.append(fp_t) img.append(img_t) fp = np.asarray(fp) img = np.asarray(img) obs = np.asarray(obs) ## dongju : to save feature points and image observed path = '/hdd/gps-master/experiments/' + 'block_insert_new' + '/data_files/check_fp' if not os.path.exists(path): os.mkdir(path) print path, ' is created' fname = path + '/fp_%d_%d.npz' % (itr, condition) np.savez_compressed(fname, fp=fp, img=img, obs=obs) new_sample.set(ACTION, U) new_sample.set(NOISE, noise) if save: self._samples[condition].append(new_sample) return new_sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. Returns: sample: A Sample object. """ # There are different trajectories based on if reset or not if self.reset_time: trajectories = self.reset_trajectories else: trajectories = self.trajectories if condition not in trajectories: # If this hasn't been initialized yet if self.reset_time: self.init_reset_traj(condition, policy) else: self.compute_reference_trajectory(condition, policy) self.samples_taken[condition] += 1 # Increment number of samples taken # Every some many samples, take more of the trajectory # Unless we have reached the end or we are using varying T if self.samples_taken[condition] % (self.num_samples * self.iter_per_seg) == 0 and self.iter_count != 0 \ and (self.T != self.final_T or self.varying_T): self.update_T_then_policy(policy, condition) self.T = self.cur_T[ condition] # Make sure the T is correct for the condition we are on ref_traj_info = trajectories[condition] # Length of the trajectory traj_length = len(self.full_ref_ee[condition]) # This is how long the current trajectory we're using is - self.T if self.T == self.final_T: # If we have gotten to the whole trajectory ref_traj = self.trajectories[condition][ 'ee'] # Current reference trajectory else: # Otherwise pad the reference trajectory as well ref_traj = self.trajectories[condition]['ee'][:self.T - self.padding] ref_traj.extend([ self.trajectories[condition]['ee'][self.T - self.padding - 1] ] * self.padding) print('The length of the trajectory we are currently using is ' + str(self.T)) print 'Sampling, condition', condition self.reset(condition) #added from agent_ros.py of public gps codebase if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): print('well this got called') self._init_tf(policy.dU) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute trial. trial_command = TrialCommand() #trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) if self.samples_taken[ condition] % self.num_samples == 0 and self.samples_taken[ condition] != 0: self.iter_count += 1 # This is the full count self.pickle_self() # Pickle self and send to data files lmaooo with open( 'iter' + str(self.iter_count) + '_cond' + str(condition) + '.txt', 'w') as f: the_noise = np.zeros((self.T, self.dU)) f.write(str(policy_to_msg(policy, the_noise))) trial_command.T = self.T trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] ee_points = self._hyperparams['end_effector_points'] trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = ref_traj[-1] trial_command.state_datatypes = self._hyperparams['state_include'] trial_command.obs_datatypes = self._hyperparams['state_include'] if self.use_tf is False: sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout']) else: self._trial_service.publish(trial_command) sample_msg = self.run_trial_tf( policy, time_to_run=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self) sample.set('target_traj_ee_points', [points - ref_traj[-1] for points in ref_traj]) sample.set(REF_OFFSETS, ref_traj_info['offsets'][:self.T]) sample.set(REF_TRAJ, np.array([ref_traj_info['offsets'].flatten()] * self.T)) if save: self._samples[condition].append(sample) if self.varying_T: # Only save this if you are gonna use varying T self.saved_samples[condition].append( sample) # Save it here too just in case self.reset(condition) # Might as well reset for the heck of it return sample
def sample(self, policy, condition, save=True, noisy=True, reset_cond=None, **kwargs): """Performs agent reset and rolls out given policy to collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. reset_cond: The initial condition to reset the agent into. Returns: sample: A Sample object. """ if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) sample = Sample(self) self.reset(reset_cond) # Execute policy over a time period of [0,T] # TODO: Find better solution to change mode. # relax arm to change mode to torque. If this is not done, the mode will be changed in timestep t=0 causing # the loop to be slow in timestep t=1 because the mutex in the cpp is locked. """ self.relax_arm() time.sleep(1) start = time.time() for t in range(self.T): # Read sensors and store sensor data in sample latest_sample = self.get_data() for sensor_type in self.x_data_types: sample.set(sensor_type, latest_sample.get(sensor_type), t) sample.set(END_EFFECTOR_POINT_JACOBIANS, latest_sample.get(END_EFFECTOR_POINT_JACOBIANS), t=t) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) # TODO: find better solution to clip (same as in cpp) torque_limits_ = np.array([4.0, 4.0, 4.0, 4.0, 1.0, 1.0, .5]) U_t = np.clip(U_t, -torque_limits_, torque_limits_) # Perform action self.reset_arm(None, None, U_t, False) sample.set(ACTION, U_t, t) # Check if agent is keeping up sleep_time = start + (t + 1) * self.dt - time.time() if sleep_time < 0: logging.critical( "Agent can't keep up.In timestep %i it is %fs behind." % (t, sleep_time)) elif sleep_time < self.dt / 2: logging.warning("Agent may not keep up (%.0f percent busy)" % (((self.dt - sleep_time) / self.dt) * 100)) # Wait for next timestep if sleep_time > 0: time.sleep(sleep_time) if save: self._samples[condition].append(sample) self.reset(reset_cond) return sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to to used in the trial. condition: Which condition setup to run. verbose: Whether or not to plot the trial. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. """ # Create new sample, populate first time step. feature_fn = None if 'get_features' in dir(policy): feature_fn = policy.get_features new_sample = self._init_sample(condition, feature_fn=feature_fn) mj_X = self._world[condition].reset( ) #initial state in mj_world, condition-specific U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) if np.any(self._hyperparams['x0var'][condition] > 0): x0n = self._hyperparams['x0var'] * \ np.random.randn(self._hyperparams['x0var'].shape) mj_X += x0n noisy_body_idx = self._hyperparams['noisy_body_idx'][condition] if noisy_body_idx.size > 0: for i in range(len(noisy_body_idx)): idx = noisy_body_idx[i] var = self._hyperparams['noisy_body_var'][condition][i] self._model[condition]['body_pos'][idx, :] += \ var * np.random.randn(1, 3) timestep = 0.05 speedup = 1 # Take the sample. for t in range(self.T): X_t = new_sample.get_X(t=t) #get state from _data in sample class obs_t = new_sample.get_obs(t=t) mj_U = policy.act(X_t, obs_t, t, noise[t, :]) U[t, :] = mj_U if (t + 1) < self.T: mj_X, reward, terminal, _ = self._world[condition].step(mj_U) # if verbose: # self._world[condition].render() # time.sleep(timestep / speedup) # import time as ttime #self._data = self._world[condition].get_data() #get data from mj_world self._set_sample(new_sample, mj_X, reward, t, condition, feature_fn=feature_fn) new_sample.set(ACTION, U) new_sample.set(NOISE, noise) if save: self._samples[condition].append(new_sample) return new_sample
def sample(self, policy, condition, verbose=False, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to to used in the trial. condition (int): Which condition setup to run. verbose (boolean): Whether or not to plot the trial (not used here). save (boolean): Whether or not to store the trial into the samples. noisy (boolean): Whether or not to use noise during sampling. """ # reset the world and assign the initialized state to new_sample self._worlds[condition].run() self._worlds[condition].reset_world() b2d_X = self._worlds[condition].get_state() new_sample = self._init_sample(b2d_X) self.reach_start = None self.reach_end = None # initialize a dummy action sequence U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) for t in range(self.T): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) U[t, :] = policy.act(X_t, obs_t, t, noise[t, :]) # print(U[t]) if (t+1) < self.T: for _ in range(self._hyperparams['substeps']): self._worlds[condition].run_next(U[t, :]) if self._worlds[condition].reach: self._worlds[condition].reach = False if self.reach_start == None: self.reach_start = t # print("reach_start", t) elif self.reach_end == None or t>self.reach_end: self.reach_end = t if t==self.T-2: # continue reaching till the end of series # print("reach_end", self.reach_end) period = self.reach_end - self.reach_start # print("reach period", period) if period > 3: self.finishing = True # self.finishing_time = self.reach_end self.finishing_time = self.reach_start elif self.reach_end == t-1 : # just leave # print("reach_end", self.reach_end) period = self.reach_end - self.reach_start # print("reach period", period) if period > 1: self.finishing = True # self.finishing_time = self.reach_end self.finishing_time = self.reach_start if self.finishing_time == 0: self.finishing_time = 1 b2d_X = self._worlds[condition].get_state() self._set_sample(new_sample, b2d_X, t) new_sample.set(ACTION, U) if save: self._samples[condition].append(new_sample) # if self.finishing: # print("agent_bus t= ", self.finishing_time) return new_sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed. if isinstance(policy, TfPolicy): self._init_tf(policy.dU) self.reset() new_sample = self._init_sample(condition) b_X = self._hyperparams['x0'][condition] U = np.zeros([self.T, self.dU]) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) noise /= 50 # print(noise[0,:]) # noise[5] /= 10 # noise[4] /= 20 # noise[1] *= 5 # noise[3] *= 5 # Take the sample. t = 0 while t < self.T: curr_time = rospy.get_time() X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) if self._hyperparams['curr_runs'] < 5: b_U = self._hyperparams['u0'][t, :] b_U = noise[t, :] # print(b_U) else: b_U = policy.act(X_t, obs_t, t, noise[t, :]) U[t, :] = b_U # clip deltas to the given limits! b_U[:-1] = np.clip(b_U[:-1], -self.taskspace_deltas[:-1], self.taskspace_deltas[:-1]) b_U[-1] = np.clip(b_U[-1], -self.taskspace_deltas[-1], self.taskspace_deltas[-1]) if (t + 1) < self.T: # b_X, b_U_check, image = self._step(b_U, curr_time) b_X, b_U_check, image, rcnn_image = self._step_taskspace( b_U, X_t, curr_time) if b_X is None: self.reset() rospy.sleep(0.5) new_sample = self._init_sample(condition) b_X = self._hyperparams['x0'][condition] U = np.zeros([self.T, self.dU]) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) noise /= 50 t = 0 continue else: self._set_sample(new_sample, b_X, t, condition) new_sample.set(RGB_IMAGE, image, t=t + 1) new_sample.set(RCNN_OUTPUT, image, t=t + 1) t += 1 if self._hyperparams['curr_runs'] < 5: self._hyperparams['curr_runs'] += 1 new_sample.set(ACTION, U) new_sample.set(NOISE, noise) print("Took sample...") self.reset() if save: self._samples[condition].append(new_sample) return new_sample
def sample(self, policy, condition, verbose=True, save=True, noisy=True): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ if TfPolicy is not None: # user has tf installed (try import) if isinstance(policy, TfPolicy): # False, policy = Linear Gaussian policy self._init_tf(policy.dU) # Reset the agent for a particular experiment condition self.reset(condition) # Generate noise. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute trial. trial_command = TrialCommand() # ROS message trial_command.id = self._get_next_seq_id() trial_command.controller = policy_to_msg(policy, noise) # ControllerParams trial_command.T = self.T # Trajectory length trial_command.id = self._get_next_seq_id() trial_command.frequency = self._hyperparams['frequency'] # Controller frequency ee_points = self._hyperparams['end_effector_points'] # 3*n_points array containing offsets trial_command.ee_points = ee_points.reshape(ee_points.size).tolist() trial_command.ee_points_tgt = \ self._hyperparams['ee_points_tgt'][condition].tolist() # 3*n_points array containing the desired ee_points for this trial trial_command.state_datatypes = self._hyperparams['state_include'] # Which data types to include in state trial_command.obs_datatypes = self._hyperparams['state_include'] # Which data types to include in observation # ------------- Local Policy ------------- # use_tf is False if self.use_tf is False: # self._trial_service = ServiceEmulator( # self._hyperparams['trial_command_topic'], TrialCommand, # self._hyperparams['sample_result_topic'], SampleResult # ) # Publish a message and wait for the response sample_msg = self._trial_service.publish_and_wait( trial_command, timeout=self._hyperparams['trial_timeout'] ) sample = msg_to_sample(sample_msg, self) # Saving the samples for tf[?] if save: self._samples[condition].append(sample) return sample else: self._trial_service.publish(trial_command) # Run an async controller from a policy. # The async controller receives observations from ROS subscribers # and then uses them to publish actions sample_msg = self.run_trial_tf(policy, time_to_run=self._hyperparams['trial_timeout']) sample = msg_to_sample(sample_msg, self) if save: self._samples[condition].append(sample) return sample
def merge_controller(self, policy_cur, alpha1, policy_prev, alpha2, condition, verbose=True, save=True, noisy=True): """ Runs a trial and constructs a new sample containing information about the trial. Args: policy: Policy to to used in the trial. condition: Which condition setup to run. verbose: Whether or not to plot the trial. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. """ # Create new sample, populate first time step. feature_fn = None if 'get_features' in dir(policy_cur): feature_fn = policy_cur.get_features new_sample = self._init_sample(condition, feature_fn=feature_fn) mj_X = self._hyperparams['x0'][condition] U = np.zeros([self.T, self.dU]) if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) if np.any(self._hyperparams['x0var'][condition] > 0): x0n = self._hyperparams['x0var'] * \ np.random.randn(self._hyperparams['x0var'].shape) mj_X += x0n noisy_body_idx = self._hyperparams['noisy_body_idx'][condition] if noisy_body_idx.size > 0: for i in range(len(noisy_body_idx)): idx = noisy_body_idx[i] var = self._hyperparams['noisy_body_var'][condition][i] self._model[condition]['body_pos'][idx, :] += \ var * np.random.randn(1, 3) # Take the sample. for t in range(self.T): X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) mj_U = policy_cur.merge_act(policy_prev, alpha1, alpha2, X_t, obs_t, t, noise[t, :]) # mj_U = policy_cur.act(X_t, obs_t, t, noise[t, :]) U[t, :] = mj_U if verbose: self._world[condition].plot(mj_X) if (t + 1) < self.T: for _ in range(self._hyperparams['substeps']): mj_X, _ = self._world[condition].step(mj_X, mj_U) # TODO: Some hidden state stuff will go here. self._data = self._world[condition].get_data() self._set_sample(new_sample, mj_X, t, condition, feature_fn=feature_fn) new_sample.set(ACTION, U) if save: self._samples[condition].append(new_sample) return new_sample
def test_sample(self, policy, condition, verbose=True, save=False, noisy=False, length=200): """ Reset and execute a policy and collect a sample to test the learned policy. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. Returns: sample: A Sample object. """ # user has tf installed. if TfPolicy is not None: if isinstance(policy, TfPolicy): self._init_tf(policy.dU) start_time = time.time() episode_reward = 0. # reset state obs, state, _ = self.reset() sensor_state = {'POS_FORCE': state} # new_sample = self._init_sample(sensor_state) new_sample = self._init_test_sample(sensor_state, length) U = np.zeros([length, self.dU]) # Generate noise if noisy: noise = generate_noise(length, self.dU, self._hyperparams) else: noise = np.zeros((length, self.dU)) # Sample for t in range(length - 1): print( " ========================= Step {} =========================". format(t)) X_t = new_sample.get_X(t=t) obs_t = new_sample.get_obs(t=t) # print('observation:', obs_t) U[t, :] = policy.act(X_t, obs_t, t, noise[t, :]) action = np.clip(U[t, :], -1, 1) * self._env.action_high_bound print('gps_action:', action) # Execute trial. new_obs, next_state, r, done, safe_or_not, final_action = \ self._env.step(action, t) episode_reward += r sensor_next_state = {'POS_FORCE': next_state} self._set_sample(new_sample, sensor_next_state, t + 1) if safe_or_not is False: break if done: break end_time = time.time() episode_time = end_time - start_time new_sample.set(ACTION, U) if save: self._samples[condition].append(new_sample) return new_sample, episode_reward, episode_time