def _init_sample(self, b2d_X): """ Construct a new sample and fill in the first time step. """ sample = Sample(self) self._set_sample(sample, b2d_X, -1) feature_fn = None if RGB_IMAGE in self.obs_data_types: ## TODO : replace below line with other function # ex 1: # self.img = self.baxter.get_baxter_camera_image() # sample.set(RGB_IMAGE, np.transpose(self.img, (2, 1, 0)).flatten(), t = 0) # ex 2: # sample.set(RGB_IMAGE, img_data, t=0) sample.set(RGB_IMAGE_SIZE, [ self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height'] ], t=None) if IMAGE_FEAT in self.obs_data_types: raise ValueError( 'Image features should not be in observation, just state') if feature_fn is not None: obs = sample.get_obs( ) # Assumes that the rest of the sample has been populated sample.set(IMAGE_FEAT, feature_fn(obs), t=0) else: sample.set( IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT], )), t=0) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. feature_fn: funciton to comptue image features from the observation. """ sample = Sample(self) # Initialize world/run kinematics self._init(condition) # Initialize sample with stuff from _data data = self._world[condition].get_data() sample.set(JOINT_ANGLES, data['qpos'].flatten(), t=0) sample.set(JOINT_VELOCITIES, data['qvel'].flatten(), t=0) eepts = data['site_xpos'].flatten() sample.set(END_EFFECTOR_POINTS, eepts, t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, np.zeros_like(eepts), t=0) if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']): sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(eepts, self._hyperparams['target_idx']), t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(np.zeros_like(eepts), self._hyperparams['target_idx']), t=0) jac = np.zeros([eepts.shape[0], self._model[condition]['nq']]) for site in range(eepts.shape[0] // 3): idx = site * 3 jac[idx:(idx+3), :] = self._world[condition].get_jac_site(site) sample.set(END_EFFECTOR_POINT_JACOBIANS, jac, t=0) # save initial image to meta data self._world[condition].plot(self._hyperparams['x0'][condition]) img = self._world[condition].get_image_scaled(self._hyperparams['image_width'], self._hyperparams['image_height']) # mjcpy image shape is [height, width, channels], # dim-shuffle it for later conv-net processing, # and flatten for storage img_data = np.transpose(img["img"], (2, 1, 0)).flatten() # if initial image is an observation, replicate it for each time step if CONTEXT_IMAGE in self.obs_data_types: sample.set(CONTEXT_IMAGE, np.tile(img_data, (self.T, 1)), t=None) else: sample.set(CONTEXT_IMAGE, img_data, t=None) sample.set(CONTEXT_IMAGE_SIZE, np.array([self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height']]), t=None) # only save subsequent images if image is part of observation if RGB_IMAGE in self.obs_data_types: sample.set(RGB_IMAGE, img_data, t=0) sample.set(RGB_IMAGE_SIZE, [self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height']], t=None) if IMAGE_FEAT in self.obs_data_types: raise ValueError('Image features should not be in observation, just state') if feature_fn is not None: obs = sample.get_obs() # Assumes that the rest of the sample has been populated sample.set(IMAGE_FEAT, feature_fn(obs), t=0) else: # TODO - need better solution than setting this to 0. sample.set(IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT],)), t=0) return sample
def sample( self, policy, condition, verbose=True, save=True, noisy=True, use_TfController=False, timeout=None, reset_cond=None, record=False ): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. use_TfController: Whether to use the syncronous TfController Returns: sample: A Sample object. """ if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Get a new sample sample = Sample(self) self.env.video_callable = lambda episode_id, record=record: record # Get initial state self.env.seed(None if reset_cond is None else self.x0[reset_cond]) obs = self.env.reset() if self._hyperparams.get('initial_step', 0) > 0: # Take one random step to get a slightly random initial state distribution U_initial = (self.env.action_space.high - self.env.action_space.low ) / 12 * np.random.normal(size=self.dU) * self._hyperparams['initial_step'] obs = self.env.step(U_initial)[0] self.set_states(sample, obs, 0) U_0 = policy.act(sample.get_X(0), sample.get_obs(0), 0, noise) sample.set(ACTION, U_0, 0) for t in range(1, self.T): if not record and self.render: self.env.render(mode='human') # TODO add hyperparam # Get state obs, _, done, _ = self.env.step(sample.get_U(t - 1)) self.set_states(sample, obs, t) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) sample.set(ACTION, U_t, t) if done and t < self.T - 1: raise Exception('Iteration ended prematurely %d/%d' % (t + 1, self.T)) if save: self._samples[condition].append(sample) self.active = False #print("X", sample.get_X()) #print("U", sample.get_U()) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. feature_fn: funciton to comptue image features from the observation. """ sample = Sample(self) self.indy.joint_move_to(self._hyperparams[‘x0’][condition][0:6]) # Initialize sample with stuff from _data # indy : get joint positions self.prev_positions = self.indy.get_joint_pos() ## TODO : replace below line with indy function # get indy joint positions sample.set(JOINT_ANGLES, self.prev_positions, t=0) # get indy joint velocities sample.set(JOINT_VELOCITIES, self.indy.get_joint_vel(), t=0) # get indy end effector positions ee_point = self.indy.get_task_pos()[:3] sample.set(END_EFFECTOR_POINTS, ee_point, t=0) # sample.set(END_EFFECTOR_POINTS, list(ee_point), t=t+1) # get indy end effector velocity vel = self.indy.get_task_vel() ee_vel = vel[:3] ee_omg = vel[3:] sample.set(END_EFFECTOR_POINT_VELOCITIES, np.array(list(ee_vel) + list(ee_omg)), t=0) # get indy jacobian ### please add a function that retreive jacobian matrix here. sample.set(END_EFFECTOR_POINT_JACOBIANS, self.indy, t=0) ## TODO : check whether below line is neccessary or not. if (END_EFFECTOR_POINTS_NO_TARGET in self._hyperparams['obs_include']): sample.set(END_EFFECTOR_POINTS_NO_TARGET, np.delete(eepts, self._hyperparams['target_idx']), t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES_NO_TARGET, np.delete(np.zeros_like(eepts), self._hyperparams['target_idx']), t=0) ## TODO : enable this again when after install camera # only save subsequent images if image is part of observation if RGB_IMAGE in self.obs_data_types: ## TODO : replace below line with other function # ex 1: # self.img = self.baxter.get_baxter_camera_image() # sample.set(RGB_IMAGE, np.transpose(self.img, (2, 1, 0)).flatten(), t = 0) # ex 2: # sample.set(RGB_IMAGE, img_data, t=0) sample.set(RGB_IMAGE_SIZE, [self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height']], t=None) if IMAGE_FEAT in self.obs_data_types: raise ValueError('Image features should not be in observation, just state') if feature_fn is not None: obs = sample.get_obs() # Assumes that the rest of the sample has been populated sample.set(IMAGE_FEAT, feature_fn(obs), t=0) else: sample.set(IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT],)), t=0) return sample
def _init_sample(self, condition, feature_fn=None): """ Construct a new sample and fill in the first time step. Args: condition: Which condition to initialize. """ sample = Sample(self) ## modified #self.baxter.move_baxter_to_joint_positions([1.05, -0.01, 0.20, 0.50, 0.47, 0.80, -0.14]) #self.baxter.move_baxter_to_joint_positions([0.27, -1.14, 0.98, 1.60, 0.15, 0.51, 0.27]) # for block_inserting task #self.baxter.move_baxter_to_joint_positions([0.32, -0.71, 0.68, 1.09, 0.07, 0.76, 0.13]) # for ball_punching task #self.baxter.move_baxter_to_joint_positions(self._hyperparams['x0'][condition][0:7]) #self.baxter.initialize_left_arm([-0.22549517556152346, 0.36815538867187503, -1.5040681608032227, 0.5817622131408692, -0.5012282218688965, 1.8553497608276368, 0.08935438079223633]) # for block_inserting task self.baxter.initialize_left_arm( self._hyperparams['initial_left_arm'][condition]) # grasping task self.cnt = 0 self.prev_positions = self.baxter.get_baxter_joint_angles_positions() # sample.set(JOINT_ANGLES, np.array(self.baxter.get_baxter_joint_angles_positions()), t=0) sample.set(JOINT_ANGLES, np.array(self.prev_positions), t=0) sample.set(JOINT_VELOCITIES, np.array(self.baxter.get_baxter_joint_angles_velocities()), t=0) sample.set(END_EFFECTOR_POINTS, np.array(self.baxter.get_baxter_end_effector_pose()), t=0) sample.set(END_EFFECTOR_POINT_VELOCITIES, np.array(self.baxter.get_baxter_end_effector_velocity()), t=0) sample.set(END_EFFECTOR_POINT_JACOBIANS, np.array(self.baxter.get_baxter_end_effector_jacobian()), t=0) ## NEED TO ADD SENSOR 'RGB_IMAGE' ## NEED TO ADD 'get_baxter_camera_image()' in 'baxter_methods.py' if RGB_IMAGE in self.obs_data_types: #self.baxter.get_baxter_camera_open() self.img = self.baxter.get_baxter_camera_image() np.savez('camera_image_blind_' + str(condition) + '.npz', img=self.img) ## NEED TO CHECK IMAGE SHAPE ## NEED TO CHECK IMAGE TYPE - INT? / FLOAT? ## MUJOCO: [HEIGHT, WIDTH, CHANNELS] == [300, 480, 3] sample.set(RGB_IMAGE, np.transpose(self.img, (2, 1, 0)).flatten(), t=0) sample.set(RGB_IMAGE_SIZE, [ self._hyperparams['image_channels'], self._hyperparams['image_width'], self._hyperparams['image_height'] ], t=None) if IMAGE_FEAT in self.obs_data_types: raise ValueError( 'Image features should not be in observation, just state') if feature_fn is not None: obs = sample.get_obs() sample.set(IMAGE_FEAT, feature_fn(obs), t=0) else: sample.set( IMAGE_FEAT, np.zeros((self._hyperparams['sensor_dims'][IMAGE_FEAT], )), t=0) return sample
def sample(self, policy, condition, save=True, noisy=True, reset_cond=None, **kwargs): """ Reset and execute a policy and collect a sample. Args: policy: A Policy object. condition: Which condition setup to run. verbose: Unused for this agent. save: Whether or not to store the trial into the samples. noisy: Whether or not to use noise during sampling. use_TfController: Whether to use the syncronous TfController Returns: sample: A Sample object. """ # Get a new sample sample = Sample(self) sample_ok = False while not sample_ok: if not self.debug: self.reset(reset_cond) self.__init_opcua() if noisy: noise = generate_noise(self.T, self.dU, self._hyperparams) else: noise = np.zeros((self.T, self.dU)) # Execute policy over a time period of [0,T] start = time.time() for t in range(self.T): # Read sensors and store sensor data in sample def store_sensor(sensor): sample.set(sensor, self.read_sensor(sensor), t) self.pool.map(store_sensor, self.sensors) # Override sensors for override in self.sensor_overrides: if override['condition'](t): sensor = override['sensor'] sample.set(sensor, np.copy(override['value']), t) print('X_%02d' % t, sample.get_X(t)) # Get action U_t = policy.act(sample.get_X(t), sample.get_obs(t), t, noise) # Override actuators for override in self.actuator_overrides: if override['condition'](t): actuator = override['actuator'] U_t[self._u_data_idx[actuator]] = np.copy(override['value']) # Send signals self.send_signals(t) # Perform action for actuator in self._u_data_idx: self.write_actuator(actuator, U_t[self._u_data_idx[actuator]]) sample.set(ACTION, U_t, t) print('U_%02d' % t, U_t) # Check if agent is keeping up sleep_time = start + (t + 1) * self.dt - time.time() if sleep_time < 0: logging.critical("Agent can't keep up. %fs bedind." % sleep_time) elif sleep_time < self.dt / 2: logging.warning( "Agent may not keep up (%.0f percent busy)" % (((self.dt - sleep_time) / self.dt) * 100) ) # Wait for next timestep if sleep_time > 0 and not self.debug: time.sleep(sleep_time) if save: self._samples[condition].append(sample) self.active = False self.finalize_sample() sample_ok = input('Continue?') == 'y' if not sample_ok: print('Repeating') return sample