def step(self, action): z = action obs = self.base_env.get_observation() obs = obs["image"].reshape([self.img_dim, self.img_dim, 3]) * 255 real_action, log_prob = self.predictive_model.predict( self.past[-self.past_length:], obs, z) total_reward = 0 for i in range(self.num_execution_per_step): first_predicted_action = real_action[0, 0, 0, i] x, y, z, theta = first_predicted_action[0], first_predicted_action[1], first_predicted_action[2], \ first_predicted_action[3] a = np.array([x, y, z, theta]) a = (a * stddev) + mean obs, reward, done, info = self.base_env.step(a) info["log_prob"] = log_prob total_reward += reward quat = obs["state"][3:7] theta = bullet.quat_to_deg(quat)[2] state = np.array( [obs["state"][0], obs["state"][1], obs["state"][2], theta]).reshape([1, self.state_dim]) self.past = np.concatenate([self.past, state], axis=0) return obs, total_reward, done, info
def get_vr_output(): nonlocal prev_vr_theta ee_pos, ee_theta = bullet.get_link_state(env.robot_id, env.end_effector_index) events = p.getVREvents() # detect input from controllers assert events, "no input from controller!" e = events[0] # obtain gripper state from controller trigger trigger = get_gripper_input(e) # pass controller position and orientation into the environment cont_pos = e[POSITION] cont_orient = bullet.deg_to_quat([180, 0, 0]) if ORIENTATION_ENABLED: cont_orient = e[ORIENTATION] cont_orient = bullet.quat_to_deg(list(cont_orient)) action = [ cont_pos[0] - ee_pos[0], cont_pos[1] - ee_pos[1], cont_pos[2] - ee_pos[2] ] action = np.array(action) * 3.5 # to make grasp success < 20 timesteps grip = trigger for _ in range(2): action = np.append(action, 0) wrist_theta = cont_orient[2] - prev_vr_theta action = np.append(action, wrist_theta) action = np.append(action, grip) action = np.append(action, 0) # =========================================================== # Add noise during actual data collection noise = 0.1 noise_scalings = [noise] * 3 + [0.1 * noise] * 3 + [noise] * 2 action += np.random.normal(scale=noise_scalings) # =========================================================== action = np.clip(action, -1 + EPSILON, 1 - EPSILON) prev_vr_theta = cont_orient[2] return action
def step(self, action): # TODO Clean this up if np.isnan(np.sum(action)): print('action', action) raise RuntimeError('Action has NaN entries') action = np.clip(action, -1, +1) # TODO Clean this up xyz_action = action[:3] # ee position actions abc_action = action[3:6] # ee orientation actions gripper_action = action[6] neutral_action = action[7] ee_pos, ee_quat = bullet.get_link_state( self.robot_id, self.end_effector_index) joint_states, _ = bullet.get_joint_states(self.robot_id, self.movable_joints) gripper_state = np.asarray([joint_states[-2], joint_states[-1]]) target_ee_pos = ee_pos + self.xyz_action_scale * xyz_action ee_deg = bullet.quat_to_deg(ee_quat) target_ee_deg = ee_deg + self.abc_action_scale * abc_action target_ee_quat = bullet.deg_to_quat(target_ee_deg) if self.control_mode == 'continuous': num_sim_steps = self.num_sim_steps target_gripper_state = gripper_state + \ [-self.gripper_action_scale * gripper_action, self.gripper_action_scale * gripper_action] elif self.control_mode == 'discrete_gripper': if gripper_action > 0.5 and not self.is_gripper_open: num_sim_steps = self.num_sim_steps_discrete_action target_gripper_state = GRIPPER_OPEN_STATE self.is_gripper_open = True # TODO(avi): Clean this up elif gripper_action < -0.5 and self.is_gripper_open: num_sim_steps = self.num_sim_steps_discrete_action target_gripper_state = GRIPPER_CLOSED_STATE self.is_gripper_open = False # TODO(avi): Clean this up else: num_sim_steps = self.num_sim_steps if self.is_gripper_open: target_gripper_state = GRIPPER_OPEN_STATE else: target_gripper_state = GRIPPER_CLOSED_STATE # target_gripper_state = gripper_state else: raise NotImplementedError target_ee_pos = np.clip(target_ee_pos, self.ee_pos_low, self.ee_pos_high) target_gripper_state = np.clip(target_gripper_state, GRIPPER_LIMITS_LOW, GRIPPER_LIMITS_HIGH) bullet.apply_action_ik( target_ee_pos, target_ee_quat, target_gripper_state, self.robot_id, self.end_effector_index, self.movable_joints, lower_limit=JOINT_LIMIT_LOWER, upper_limit=JOINT_LIMIT_UPPER, rest_pose=RESET_JOINT_VALUES, joint_range=JOINT_RANGE, num_sim_steps=num_sim_steps) if self.use_neutral_action and neutral_action > 0.5: if self.neutral_gripper_open: bullet.move_to_neutral( self.robot_id, self.reset_joint_indices, RESET_JOINT_VALUES) else: bullet.move_to_neutral( self.robot_id, self.reset_joint_indices, RESET_JOINT_VALUES_GRIPPER_CLOSED) info = self.get_info() reward = self.get_reward(info) done = False return self.get_observation(), reward, done, info