def sample(self, policy, condition, verbose=True, save=True, noisy=True): """This is the main method run when the Agent object is called by GPS. Draws a sample from the environment, using the specified policy and under the specified condition. If "save" is True, then append the sample object of type Sample to self._samples[condition]. TensorFlow is not yet implemented (FIXME).""" # Reset the arm to initial configuration at start of each new trial. self.reset(condition) # Generate noise to be used in the policy object to compute next state. if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute the trial. sample_data = self._run_trial( policy, noise, time_to_run=self._hyperparams['trial_timeout']) # Write trial data into sample object. sample = Sample(self) for sensor_id, data in sample_data.iteritems(): sample.set(sensor_id, np.asarray(data)) # Save the sample to the data structure. This is controlled by gps_main.py. if save: self._samples[condition].append(sample) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. feature_fn: funciton to comptue image features from the observation. """ sample = Sample(self) # Initialize world/run kinematics #self._init(condition) # Initialize sample with stuff from _data # pdb.set_trace() # get data from mj_world, condition-specific data = self._world[condition].reset(self._full_init_state[condition]) # data = self._world[condition].reset() sample.set(END_EFFECTOR_POINTS, data[0:8], t=0) #Set _data in sample class sample.set(JOINT_VELOCITIES, data[8:17], t=0) sample.set(JOINT_ANGLES, data[17:20], t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, data[20:23], t=0) #sample.set(END_EFFECTOR_POINT_JACOBIANS, np.array(0.0), t=0) return sample
def _roll_out(self, pol, itr, cond, i): if self.use_mpc and itr > 0: T = self.agent.T M = self.mpc_agent.T N = int(ceil(T / (M - 1.))) X_t = self.agent.x0[cond] # Only forward pass one time per cond, # because this same for all sample if i == 0: # Note: At this time algorithm.prev = algorithm.cur, # and prev.traj_info already have x0mu, x0sigma. self.off_prior, _ = self.algorithm.traj_opt.forward( pol, self.algorithm.prev[cond].traj_info) self.agent.publish_plan(self.off_prior) if type(self.algorithm) == AlgorithmTrajOpt: pol_info = None else: pol_info = self.algorithm.cur[cond].pol_info for n in range(N): # Note: M-1 because action[M] = [0,0]. t_traj = n * (M - 1) reset = True if (n == 0) else False mpc_pol, mpc_state = self.algorithm.mpc[cond][i].update( n, X_t, self.off_prior, pol, self.algorithm.cur[cond].traj_info, t_traj, pol_info) self.agent.publish_plan(mpc_state, True) new_sample = self.mpc_agent.sample( mpc_pol, cond, reset=reset, noisy=True, verbose=(i < self._hyperparams['verbose_trials'])) X_t = new_sample.get_X(t=M - 1) """ Merge sample for optimize offline trajectory distribution """ full_sample = Sample(self.agent) sample_lists = self.mpc_agent.get_samples(cond) keys = sample_lists[0]._data.keys() t = 0 for sample in sample_lists: for m in range(sample.T - 1): for sensor in keys: full_sample.set(sensor, sample.get(sensor, m), t) t = t + 1 if t + 1 > T: break self.agent._samples[cond].append(full_sample) # Clear agent samples. self.mpc_agent.clear_samples() else: self.agent.sample( pol, cond, verbose=(i < self._hyperparams['verbose_trials']))
def _init_sample(self, T=None): """ Construct a new sample and fill in the first time step. """ sample = Sample(self, T) self._advance_simulation() for sensor in self._sensor_types: sample.set(sensor, self._sensor_readings[sensor], t=0) return sample
def msg_to_sample(ros_msg, agent): """ Convert a SampleResult ROS message into a Sample Python object. """ sample = Sample(agent) for sensor in ros_msg.sensor_data: sensor_id = sensor.data_type shape = np.array(sensor.shape) data = np.array(sensor.data).reshape(shape) sample.set(sensor_id, data) return sample
def msg_to_sample(ros_msg, agent): """ Convert a SampleResult ROS message into a Sample Python object. """ sample = Sample(agent) # Sensor_data # int32 id # DataType[] sensor_data for sensor in ros_msg.sensor_data: sensor_id = sensor.data_type shape = np.array(sensor.shape) data = np.array(sensor.data).reshape(shape) sample.set(sensor_id, data) # Set trajectory data for a particular sensor. return sample
def _init_sample(self, b2d_X): """ Construct a new sample and fill in the first time step. """ sample = Sample(self) self._set_sample(sample, b2d_X, -1) return sample
def _init_test_sample(self, b2d_X, length): """ Construct a new sample and fill in the first time step. """ sample = Sample(self, test=True, length=length) self._set_sample(sample, b2d_X, 0) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. """ sample = Sample(self) self._set_sample(sample, condition, -1, feature_fn=feature_fn) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. feature_fn: function to compute image features from the observation. """ sample = Sample(self) t = -1 stateX, jac_t, image_data = self.get_state(t) self._set_sample(sample, stateX, jac_t, t, condition) # is jac_t correct or should it be jac_r?? return sample, image_data
def _init_sample(self, b2d_X): """ Construct a new sample and fill in the first time step. """ sample = Sample(self) self._set_sample(sample, b2d_X, -1) feature_fn = None if RGB_IMAGE in self.obs_data_types: ## TODO : replace below line with other function # ex 1: # self.img = self.baxter.get_baxter_camera_image() # sample.set(RGB_IMAGE, np.transpose(self.img, (2, 1, 0)).flatten(), t = 0) # ex 2: # sample.set(RGB_IMAGE, img_data, t=0) sample.set(RGB_IMAGE_SIZE, [ self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height'] ], t=None) if IMAGE_FEAT in self.obs_data_types: raise ValueError( 'Image features should not be in observation, just state') if feature_fn is not None: obs = sample.get_obs( ) # Assumes that the rest of the sample has been populated sample.set(IMAGE_FEAT, feature_fn(obs), t=0) else: sample.set( IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT], )), t=0) return sample
def iteration(self, sample_lists, itr, train_gcm=False): """ Run iteration of MDGPS-based guided policy search. Args: sample_lists: List of SampleList objects for each condition. _: to match parent class """ # Get all samples samples = [ sample for i in range(len(sample_lists)) for sample in sample_lists[i].get_samples() ] # Split longer trajectories in shorter segements if samples[0].T > self.T: assert samples[0].T % self.T == 0 samples[0].agent.T = self.T # Fake new T new_samples = [] for sample in samples: for i in range(samples[0].T / self.T): new_sample = Sample(sample.agent) for sensor in sample._data: # Split data new_sample._data[sensor] = sample._data[sensor][ i * self.T:(i + 1) * self.T] new_samples.append(new_sample) samples = new_samples self.N = len(samples) print("itr", itr, "N: ", self.N, "M: ", self.M) assert self.min_samples_per_cluster * self.M <= self.N X = np.asarray([sample.get_X() for sample in samples]) U = np.asarray([sample.get_U() for sample in samples]) # Update global dynamics prior self.dynamics_prior.update(X, U) # Store end effector points for visualization self.eeps = [s.get(END_EFFECTOR_POINTS) for s in samples] # Cluster samples clusterings = self.tac(samples, self.initial_clustering) for i in range(self.random_resets): clusterings.extend(self.tac(samples, 'random')) self.responsibilitieses = [c[0] for c in clusterings] # Store for export # Select clustering with maximal likelihood self._assign_samples( samples, clusterings[np.argmax([c[1] for c in clusterings])][0]) self.m_step( for_tac=False ) #Fit linearizations again, but this time also using the local trajectories # C-step if self.iteration_count > 0: self._stepadjust() self._update_trajectories() # S-step self._update_policy() # Prepare for next iteration self._advance_iteration_variables()
def _init_sample(self, condition): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. """ sample = Sample(self) # Initialize world/run kinematics self._init(condition) # Initialize sample with stuff from _data data = self._world[condition].get_data() sample.set(JOINT_ANGLES, data['qpos'].flatten(), t=0) sample.set(JOINT_VELOCITIES, data['qvel'].flatten(), t=0) eepts = data['site_xpos'].flatten() sample.set(END_EFFECTOR_POINTS, eepts, t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0) jac = np.zeros([eepts.shape[0], self._model[condition]['nq']]) for site in range(eepts.shape[0] // 3): idx = site * 3 jac[idx:(idx + 3), :] = self._world[condition].get_jac_site(site) sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0) # save initial image to meta data self._world[condition].plot(self._hyperparams['x0'][condition]) img = self._world[condition].get_image_scaled( self._hyperparams['image_width'], self._hyperparams['image_height']) # mjcpy image shape is [height, width, channels], # dim-shuffle it for later conv-net processing, # and flatten for storage img_data = np.transpose(img["img"], (1, 0, 2)).flatten() # if initial image is an observation, replicate it for each time step if CONTEXT_IMAGE in self.obs_data_types: sample.set(CONTEXT_IMAGE, np.tile(img_data, (self.T, 1)), t=None) else: sample.set(CONTEXT_IMAGE, img_data, t=None) sample.set(CONTEXT_IMAGE_SIZE, np.array([ self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height'] ]), t=None) # only save subsequent images if image is part of observation if RGB_IMAGE in self.obs_data_types: sample.set(RGB_IMAGE, img_data, t=0) sample.set(RGB_IMAGE_SIZE, [ self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height'] ], t=None) return sample
def _eval_cost(self, cond, prev_cost=False): """ Evaluate costs for all samples for a condition. Args: cond: Condition to evaluate cost on. prev: Whether or not to use previous_cost (for ioc stepadjust) """ # Constants. T, dX, dU = self.T, self.dX, self.dU synN = self._hyperparams['synthetic_cost_samples'] if synN > 0: agent = self.cur[cond].sample_list.get_samples()[0].agent X, U, _ = self._traj_samples(cond, synN) syn_samples = [] for i in range(synN): sample = Sample(agent) sample.set_XU(X[i, :, :], U[i, :, :]) syn_samples.append(sample) all_samples = SampleList(syn_samples + self.cur[cond].sample_list.get_samples()) else: all_samples = self.cur[cond].sample_list N = len(all_samples) # Compute cost. cs = np.zeros((N, T)) cc = np.zeros((N, T)) cv = np.zeros((N, T, dX + dU)) Cm = np.zeros((N, T, dX + dU, dX + dU)) if self._hyperparams['ioc']: cgt = np.zeros((N, T)) for n in range(N): sample = all_samples[n] # Get costs. if prev_cost: l, lx, lu, lxx, luu, lux = self.previous_cost[cond].eval( sample) else: l, lx, lu, lxx, luu, lux = self.cost[cond].eval(sample) # Compute the ground truth cost if self._hyperparams['ioc'] and n >= synN: l_gt, _, _, _, _, _ = self.gt_cost[cond].eval(sample) cgt[n, :] = l_gt cc[n, :] = l cs[n, :] = l # Assemble matrix and vector. cv[n, :, :] = np.c_[lx, lu] Cm[n, :, :, :] = np.concatenate( (np.c_[lxx, np.transpose(lux, [0, 2, 1])], np.c_[lux, luu]), axis=1) # Adjust for expanding cost around a sample. X = sample.get_X() U = sample.get_U() yhat = np.c_[X, U] rdiff = -yhat rdiff_expand = np.expand_dims(rdiff, axis=2) cv_update = np.sum(Cm[n, :, :, :] * rdiff_expand, axis=1) cc[n, :] += np.sum(rdiff * cv[n, :, :], axis=1) + 0.5 * \ np.sum(rdiff * cv_update, axis=1) cv[n, :, :] += cv_update # Fill in cost estimate. if prev_cost: traj_info = self.cur[cond].prevcost_traj_info traj_info.dynamics = self.cur[cond].traj_info.dynamics traj_info.x0sigma = self.cur[cond].traj_info.x0sigma traj_info.x0mu = self.cur[cond].traj_info.x0mu else: traj_info = self.cur[cond].traj_info self.cur[cond].cs = cs[synN:] # True value of cost. traj_info.cc = np.mean(cc, 0) # Constant term (scalar). traj_info.cv = np.mean(cv, 0) # Linear term (vector). traj_info.Cm = np.mean(Cm, 0) # Quadratic term (matrix). if self._hyperparams['ioc']: self.cur[cond].cgt = cgt[synN:]
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. feature_fn: funciton to comptue image features from the observation. """ sample = Sample(self) # Initialize world/run kinematics q = [self._trial_arm.joint_angle(j) for j in self.joint_names] dq = [self._trial_arm.joint_velocity(j) for j in self.joint_names] pos = list(self._trial_arm.endpoint_pose()['position']) orn = list(self._trial_arm.endpoint_pose()['orientation']) dpos = list(self._trial_arm.endpoint_velocity()['linear']) dorn = list(self._trial_arm.endpoint_velocity()['angular']) jac = self._kin_trial.jacobian() sample.set(JOINT_ANGLES, np.asarray(q), t=0) sample.set(JOINT_VELOCITIES, np.asarray(dq), t=0) sample.set(END_EFFECTOR_POINTS, np.asarray(pos), t=0) sample.set(END_EFFECTOR_POINT_JACOBIANS, jac[:3, :], t=0) img_subs = self.img_subs_list[0] depth_subs = self.depth_subs_list[0] image = img_subs.img depth_rescaled = self.get_depth_img(depth_subs) all_visual_features, all_centroids, fig = self._get_rcnn_features( image, depth_rescaled) try: delta_centroid = all_centroids[0] - all_centroids[1] except: delta_centroid = np.array([30, 30, 30]) # print(all_centroids) # set_trace()# image_buffer.append(image[:,:,::-1]) feat_visual_1, feat_visual_2, feat_visual_max_1, feat_visual_max_2 = self._apply_feature_selection( all_visual_features) embedding = np.concatenate([ delta_centroid, feat_visual_1, feat_visual_max_1, feat_visual_2, feat_visual_max_2 ]) sample.set(TCN_EMBEDDING, embedding, t=0) if fig is not None: canvas = FigureCanvas(fig) ax = fig.gca() canvas.draw() # draw the canvas, cache the renderer img = np.array(fig.canvas.renderer._renderer) sample.set(RCNN_OUTPUT, img, t=0) sample.set(RGB_IMAGE, image, t=0) plt.close(fig) else: sample.set(RGB_IMAGE, image, t=0) sample.set(RCNN_OUTPUT, np.zeros((800, 800, 4)), t=0) return sample
def _init_sample(self, condition): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. """ sample = Sample(self) sample.set(JOINT_ANGLES, self._hyperparams['x0'][condition][self._joint_idx], t=0) sample.set(JOINT_VELOCITIES, self._hyperparams['x0'][condition][self._vel_idx], t=0) self._data = self._world.get_data() eepts = self._data['site_xpos'].flatten() sample.set(END_EFFECTOR_POINTS, eepts, t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0) jac = np.zeros([eepts.shape[0], self._model[condition]['nq']]) for site in range(eepts.shape[0] // 3): idx = site * 3 jac[idx:(idx + 3), :] = self._world.get_jac_site(site) sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. """ sample = Sample(self) ## modified #self.baxter.move_baxter_to_joint_positions([1.05, -0.01, 0.20, 0.50, 0.47, 0.80, -0.14]) #self.baxter.move_baxter_to_joint_positions([0.27, -1.14, 0.98, 1.60, 0.15, 0.51, 0.27]) # for block_inserting task #self.baxter.move_baxter_to_joint_positions([0.32, -0.71, 0.68, 1.09, 0.07, 0.76, 0.13]) # for ball_punching task #self.baxter.move_baxter_to_joint_positions(self._hyperparams['x0'][condition][0:7]) #self.baxter.initialize_left_arm([-0.22549517556152346, 0.36815538867187503, -1.5040681608032227, 0.5817622131408692, -0.5012282218688965, 1.8553497608276368, 0.08935438079223633]) # for block_inserting task self.baxter.initialize_left_arm( self._hyperparams['initial_left_arm'][condition]) # grasping task self.cnt = 0 self.prev_positions = self.baxter.get_baxter_joint_angles_positions() # sample.set(JOINT_ANGLES, np.array(self.baxter.get_baxter_joint_angles_positions()), t=0) sample.set(JOINT_ANGLES, np.array(self.prev_positions), t=0) sample.set(JOINT_VELOCITIES, np.array(self.baxter.get_baxter_joint_angles_velocities()), t=0) sample.set(END_EFFECTOR_POINTS, np.array(self.baxter.get_baxter_end_effector_pose()), t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, np.array(self.baxter.get_baxter_end_effector_velocity()), t=0) sample.set(END_EFFECTOR_POINT_JACOBIANS, np.array(self.baxter.get_baxter_end_effector_jacobian()), t=0) ## NEED TO ADD SENSOR 'RGB_IMAGE' ## NEED TO ADD 'get_baxter_camera_image()' in 'baxter_methods.py' if RGB_IMAGE in self.obs_data_types: #self.baxter.get_baxter_camera_open() self.img = self.baxter.get_baxter_camera_image() np.savez('camera_image_blind_' + str(condition) + '.npz', img=self.img) ## NEED TO CHECK IMAGE SHAPE ## NEED TO CHECK IMAGE TYPE - INT? / FLOAT? ## MUJOCO: [HEIGHT, WIDTH, CHANNELS] == [300, 480, 3] sample.set(RGB_IMAGE, np.transpose(self.img, (2, 1, 0)).flatten(), t=0) sample.set(RGB_IMAGE_SIZE, [ self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height'] ], t=None) if IMAGE_FEAT in self.obs_data_types: raise ValueError( 'Image features should not be in observation, just state') if feature_fn is not None: obs = sample.get_obs() sample.set(IMAGE_FEAT, feature_fn(obs), t=0) else: sample.set( IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT], )), t=0) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. feature_fn: funciton to comptue image features from the observation. """ sample = Sample(self) # Initialize world/run kinematics self._init(condition) # Initialize sample with stuff from _data data = self._world[condition].get_data() sample.set(JOINT_ANGLES, data['qpos'].flatten(), t=0) sample.set(JOINT_VELOCITIES, data['qvel'].flatten(), t=0) eepts = data['site_xpos'].flatten() sample.set(END_EFFECTOR_POINTS, eepts, t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0) if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']): sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(eepts, self._hyperparams['target_idx']), t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(np.zeros_like(eepts), self._hyperparams['target_idx']), t=0) jac = np.zeros([eepts.shape[0], self._model[condition]['nq']]) for site in range(eepts.shape[0] // 3): idx = site * 3 jac[idx:(idx+3), :] = self._world[condition].get_jac_site(site) sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0) # save initial image to meta data self._world[condition].plot(self._hyperparams['x0'][condition]) img = self._world[condition].get_image_scaled(self._hyperparams['image_width'], self._hyperparams['image_height']) # mjcpy image shape is [height, width, channels], # dim-shuffle it for later conv-net processing, # and flatten for storage img_data = np.transpose(img["img"], (2, 1, 0)).flatten() # if initial image is an observation, replicate it for each time step if CONTEXT_IMAGE in self.obs_data_types: sample.set(CONTEXT_IMAGE, np.tile(img_data, (self.T, 1)), t=None) else: sample.set(CONTEXT_IMAGE, img_data, t=None) sample.set(CONTEXT_IMAGE_SIZE, np.array([self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height']]), t=None) # only save subsequent images if image is part of observation if RGB_IMAGE in self.obs_data_types: sample.set(RGB_IMAGE, img_data, t=0) sample.set(RGB_IMAGE_SIZE, [self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height']], t=None) if IMAGE_FEAT in self.obs_data_types: raise ValueError('Image features should not be in observation, just state') if feature_fn is not None: obs = sample.get_obs() # Assumes that the rest of the sample has been populated sample.set(IMAGE_FEAT, feature_fn(obs), t=0) else: # TODO - need better solution than setting this to 0. sample.set(IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT],)), t=0) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. feature_fn: funciton to comptue image features from the observation. """ sample = Sample(self) # Initialize world/run kinematics #self._init(condition) # Initialize sample with stuff from _data data = self._world[condition].reset( ) #get data from mj_world, condition-specific sample.set(JOINT_ANGLES, data[0:7], t=0) #Set _data in sample class sample.set(JOINT_VELOCITIES, data[7:14], t=0) sample.set(END_EFFECTOR_POINTS, data[14:24], t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, data[24:34], t=0) sample.set(END_EFFECTOR_POINT_JACOBIANS, 0.0, t=0)
def _init_sample(self, condition): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. """ sample = Sample(self) sample.set(JOINT_ANGLES, self._hyperparams['x0'][condition][self._joint_idx], t=0) sample.set(JOINT_VELOCITIES, self._hyperparams['x0'][condition][self._vel_idx], t=0) self._data = self._world[condition].get_data() eepts = self._data['site_xpos'].flatten() sample.set(END_EFFECTOR_POINTS, eepts, t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0) jac = np.zeros([eepts.shape[0], self._model[condition]['nq']]) for site in range(eepts.shape[0] // 3): idx = site * 3 jac[idx:(idx+3), :] = self._world[condition].get_jac_site(site) sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0) # save initial image to meta data self._world[condition].plot(self._hyperparams['x0'][condition]) img = self._world[condition].get_image_scaled(self._hyperparams['image_width'], self._hyperparams['image_height']) # mjcpy image shape is [height, width, channels], # dim-shuffle it for later conv-net processing, # and flatten for storage img_data = np.transpose(img["img"], (2, 1, 0)).flatten() # if initial image is an observation, replicate it for each time step if CONTEXT_IMAGE in self.obs_data_types: sample.set(CONTEXT_IMAGE, np.tile(img_data, (self.T, 1)), t=None) else: sample.set(CONTEXT_IMAGE, img_data, t=None) sample.set(CONTEXT_IMAGE_SIZE, np.array([self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height']]), t=None) # only save subsequent images if image is part of observation if RGB_IMAGE in self.obs_data_types: sample.set(RGB_IMAGE, img_data, t=0) sample.set(RGB_IMAGE_SIZE, [self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height']], t=None) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. feature_fn: funciton to comptue image features from the observation. """ sample = Sample(self) # Initialize world/run kinematicscost jac = self._kin_trial.jacobian() X, image = self._get_current_state(t=0) if self.take_video: self.rgb_writer.append_data(image) # X = np.concatenate([geom_dist_ee_to_anchor, geom_dist_object2_to_anchor, pos, dpos, gripper_binary]) geom_dist_ee_to_anchor = X[0:3] geom_dist_object2_to_anchor = X[3:6] pos = X[6:9] dpos = X[9:12] gripper_binary = X[12] q = X[13:20] dq = X[20:27] emb = X[27:28] sample.set(OBJECT_POSE, np.concatenate( [geom_dist_ee_to_anchor, geom_dist_object2_to_anchor]), t=0) sample.set(JOINT_ANGLES, np.asarray(q), t=0) sample.set(JOINT_VELOCITIES, np.asarray(dq), t=0) sample.set(END_EFFECTOR_POINTS, pos, t=0) sample.set(END_EFFECTOR_POINT_JACOBIANS, jac[:3, :], t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, np.asarray(dpos), t=0) sample.set(IMAGE_FEATURE, np.asarray(emb), t=0) # sample.set(END_EFFECTOR_ORIENTATIONS, np.asarray(orn), t=0) # sample.set(END_EFFECTOR_ANGULAR_VELOCITIES, np.asarray(dorn), t=0) return X, sample
def sample( self, policy, condition, verbose=True, save=True, noisy=True, use_TfController=False, timeout=None, reset_cond=None, record=False ): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. use_TfController: Whether to use the syncronous TfController Returns: sample: A Sample object. """ if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Get a new sample sample = Sample(self) self.env.video_callable = lambda episode_id, record=record: record # Get initial state self.env.seed(None if reset_cond is None else self.x0[reset_cond]) obs = self.env.reset() if self._hyperparams.get('initial_step', 0) > 0: # Take one random step to get a slightly random initial state distribution U_initial = (self.env.action_space.high - self.env.action_space.low ) / 12 * np.random.normal(size=self.dU) * self._hyperparams['initial_step'] obs = self.env.step(U_initial)[0] self.set_states(sample, obs, 0) U_0 = policy.act(sample.get_X(0), sample.get_obs(0), 0, noise) sample.set(ACTION, U_0, 0) for t in range(1, self.T): if not record and self.render: self.env.render(mode='human') # TODO add hyperparam # Get state obs, _, done, _ = self.env.step(sample.get_U(t - 1)) self.set_states(sample, obs, t) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) sample.set(ACTION, U_t, t) if done and t < self.T - 1: raise Exception('Iteration ended prematurely %d/%d' % (t + 1, self.T)) if save: self._samples[condition].append(sample) self.active = False #print("X", sample.get_X()) #print("U", sample.get_U()) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. feature_fn: funciton to comptue image features from the observation. """ sample = Sample(self) self.indy.joint_move_to(self._hyperparams[‘x0’][condition][0:6]) # Initialize sample with stuff from _data # indy : get joint positions self.prev_positions = self.indy.get_joint_pos() ## TODO : replace below line with indy function # get indy joint positions sample.set(JOINT_ANGLES, self.prev_positions, t=0) # get indy joint velocities sample.set(JOINT_VELOCITIES, self.indy.get_joint_vel(), t=0) # get indy end effector positions ee_point = self.indy.get_task_pos()[:3] sample.set(END_EFFECTOR_POINTS, ee_point, t=0) # sample.set(END_EFFECTOR_POINTS, list(ee_point), t=t+1) # get indy end effector velocity vel = self.indy.get_task_vel() ee_vel = vel[:3] ee_omg = vel[3:] sample.set(END_EFFECTOR_POINT_VELOCITIES, np.array(list(ee_vel) + list(ee_omg)), t=0) # get indy jacobian ### please add a function that retreive jacobian matrix here. sample.set(END_EFFECTOR_POINT_JACOBIANS, self.indy, t=0) ## TODO : check whether below line is neccessary or not. if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']): sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(eepts, self._hyperparams['target_idx']), t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(np.zeros_like(eepts), self._hyperparams['target_idx']), t=0) ## TODO : enable this again when after install camera # only save subsequent images if image is part of observation if RGB_IMAGE in self.obs_data_types: ## TODO : replace below line with other function # ex 1: # self.img = self.baxter.get_baxter_camera_image() # sample.set(RGB_IMAGE, np.transpose(self.img, (2, 1, 0)).flatten(), t = 0) # ex 2: # sample.set(RGB_IMAGE, img_data, t=0) sample.set(RGB_IMAGE_SIZE, [self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height']], t=None) if IMAGE_FEAT in self.obs_data_types: raise ValueError('Image features should not be in observation, just state') if feature_fn is not None: obs = sample.get_obs() # Assumes that the rest of the sample has been populated sample.set(IMAGE_FEAT, feature_fn(obs), t=0) else: sample.set(IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT],)), t=0) return sample
def _init_sample(self, condition): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. """ sample = Sample(self) sample.set(JOINT_ANGLES, self._hyperparams['x0'][condition][self._joint_idx], t=0) sample.set(JOINT_VELOCITIES, self._hyperparams['x0'][condition][self._vel_idx], t=0) self._data = self._world[condition].get_data() eepts = self._data['site_xpos'].flatten() sample.set(END_EFFECTOR_POINTS, eepts, t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0) jac = np.zeros([eepts.shape[0], self._model[condition]['nq']]) for site in range(eepts.shape[0] // 3): idx = site * 3 jac[idx:(idx+3), :] = self._world[condition].get_jac_site(site) sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0) return sample
def sample(self, policy, condition, save=True, noisy=True, reset_cond=None, **kwargs): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. use_TfController: Whether to use the syncronous TfController Returns: sample: A Sample object. """ # Get a new sample sample = Sample(self) sample_ok = False while not sample_ok: if not self.debug: self.reset(reset_cond) self.__init_opcua() if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute policy over a time period of [0,T] start = time.time() for t in range(self.T): # Read sensors and store sensor data in sample def store_sensor(sensor): sample.set(sensor, self.read_sensor(sensor), t) self.pool.map(store_sensor, self.sensors) # Override sensors for override in self.sensor_overrides: if override['condition'](t): sensor = override['sensor'] sample.set(sensor, np.copy(override['value']), t) print('X_%02d' % t, sample.get_X(t)) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) # Override actuators for override in self.actuator_overrides: if override['condition'](t): actuator = override['actuator'] U_t[self._u_data_idx[actuator]] = np.copy(override['value']) # Send signals self.send_signals(t) # Perform action for actuator in self._u_data_idx: self.write_actuator(actuator, U_t[self._u_data_idx[actuator]]) sample.set(ACTION, U_t, t) print('U_%02d' % t, U_t) # Check if agent is keeping up sleep_time = start + (t + 1) * self.dt - time.time() if sleep_time < 0: logging.critical("Agent can't keep up. %fs bedind." % sleep_time) elif sleep_time < self.dt / 2: logging.warning( "Agent may not keep up (%.0f percent busy)" % (((self.dt - sleep_time) / self.dt) * 100) ) # Wait for next timestep if sleep_time > 0 and not self.debug: time.sleep(sleep_time) if save: self._samples[condition].append(sample) self.active = False self.finalize_sample() sample_ok = input('Continue?') == 'y' if not sample_ok: print('Repeating') return sample
def _eval_cost(self, cond, prev_cost=False): """ Evaluate costs for all samples for a condition. Args: cond: Condition to evaluate cost on. prev: Whether or not to use previous_cost (for ioc stepadjust) """ # Constants. T, dX, dU = self.T, self.dX, self.dU synN = self._hyperparams['synthetic_cost_samples'] if synN > 0: agent = self.cur[cond].sample_list.get_samples()[0].agent X, U, _ = self._traj_samples(cond, synN) syn_samples = [] for i in range(synN): sample = Sample(agent) sample.set_XU(X[i, :, :], U[i, :, :]) syn_samples.append(sample) all_samples = SampleList(syn_samples + self.cur[cond].sample_list.get_samples()) else: all_samples = self.cur[cond].sample_list N = len(all_samples) # Compute cost. cs = np.zeros((N, T)) cc = np.zeros((N, T)) cv = np.zeros((N, T, dX+dU)) Cm = np.zeros((N, T, dX+dU, dX+dU)) if self._hyperparams['ioc']: cgt = np.zeros((N, T)) for n in range(N): sample = all_samples[n] # Get costs. if prev_cost: l, lx, lu, lxx, luu, lux = self.previous_cost[cond].eval(sample) else: l, lx, lu, lxx, luu, lux = self.cost[cond].eval(sample) # Compute the ground truth cost if self._hyperparams['ioc'] and n >= synN: l_gt, _, _, _, _, _ = self.gt_cost[cond].eval(sample) cgt[n, :] = l_gt cc[n, :] = l cs[n, :] = l # Assemble matrix and vector. cv[n, :, :] = np.c_[lx, lu] Cm[n, :, :, :] = np.concatenate( (np.c_[lxx, np.transpose(lux, [0, 2, 1])], np.c_[lux, luu]), axis=1 ) # Adjust for expanding cost around a sample. X = sample.get_X() U = sample.get_U() yhat = np.c_[X, U] rdiff = -yhat rdiff_expand = np.expand_dims(rdiff, axis=2) cv_update = np.sum(Cm[n, :, :, :] * rdiff_expand, axis=1) cc[n, :] += np.sum(rdiff * cv[n, :, :], axis=1) + 0.5 * \ np.sum(rdiff * cv_update, axis=1) cv[n, :, :] += cv_update # Fill in cost estimate. if prev_cost: traj_info = self.cur[cond].prevcost_traj_info traj_info.dynamics = self.cur[cond].traj_info.dynamics traj_info.x0sigma = self.cur[cond].traj_info.x0sigma traj_info.x0mu = self.cur[cond].traj_info.x0mu else: traj_info = self.cur[cond].traj_info self.cur[cond].cs = cs[synN:] # True value of cost. traj_info.cc = np.mean(cc, 0) # Constant term (scalar). traj_info.cv = np.mean(cv, 0) # Linear term (vector). traj_info.Cm = np.mean(Cm, 0) # Quadratic term (matrix). if self._hyperparams['ioc']: self.cur[cond].cgt = cgt[synN:]