def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # initial position of end-effector self.ee_initial_pos = self._eef_xpos # create trajectory self.trajectory = self.get_trajectory() # initialize trajectory step self.initial_traj_step = 0#np.random.default_rng().uniform(low=0, high=self.num_waypoints - 1) self.traj_step = self.initial_traj_step # step at which to evaluate trajectory. Must be in interval [0, num_waypoints - 1] # set first trajectory point self.traj_pt = self.trajectory.eval(self.traj_step) self.traj_pt_vel = self.trajectory.deriv(self.traj_step) # give controller access to robot (and its measurements) if self.robots[0].controller.name == "HMFC": self.robots[0].controller.set_robot(self.robots[0]) # initialize controller's trajectory self.robots[0].controller.traj_pos = self.traj_pt self.robots[0].controller.traj_ori = T.quat2axisangle(self.goal_quat) # get initial joint positions for robot init_qpos = self._get_initial_qpos() # override initial robot joint positions self.robots[0].set_robot_joint_positions(init_qpos) # update controller with new initial joints self.robots[0].controller.update_initial_joints(init_qpos) # initialize data collection if self.save_data: # simulation data self.data_ee_pos = np.array(np.zeros((self.horizon, 2))) self.data_ee_goal_pos = np.array(np.zeros((self.horizon, 2))) self.data_ee_force = np.array(np.zeros(self.horizon)) self.data_ee_force_mean = np.array(np.zeros(self.horizon)) self.data_ee_goal_force = np.array(np.zeros(self.horizon)) self.data_ee_z_pos = np.array(np.zeros(self.horizon)) self.data_desired_torque = np.array(np.zeros((self.horizon, self.robots[0].dof))) self.data_external_torque = np.array(np.zeros((self.horizon, self.robots[0].dof))) self.data_compensation_torque = np.array(np.zeros((self.horizon, self.robots[0].dof))) self.data_time = np.array(np.zeros(self.horizon))
def forward(self, prediction, truth): """ Computes position distance + orientation distance (squared) between prediction and orientation if self.position_only is set to False, else only computes position distance. Assumes prediction and truth are of the same size Args: prediction (torch.Tensor): Prediction of estimated pose (*, 7) of form (x,y,z,i,j,k,w) Note: Assumes UNNORMALIZED quat part truth (torch.Tensor): Ground truth state of pose, of shape (*, 7) of form (x,y,z,i,j,k,w) Note: Assumes NORMALIZED quat part with POSITIVE w value Returns: distance (torch.Tensor): (Summed) distances across all inputs. Shape = (1,) """ # First, split tensor into separate parts predict_pos, predict_ori = torch.split(prediction, (3, 4), dim=-1) true_pos, true_ori = torch.split(truth, (3, 4), dim=-1) # Normalize the predicted ori quat_mag = torch.sqrt( torch.sum(predict_ori.pow(2), dim=-1, keepdim=True)) # Shape (*,1) predict_ori = predict_ori / quat_mag # Compute position loss based on distance metric specified if self.distance_metric == "l2": # Compute pos error (cartesian distance) summed_pos_sq = torch.sum((predict_pos - true_pos).pow(2), dim=-1, keepdim=False) # Shape (*) pos_dist = torch.sum(torch.sqrt(summed_pos_sq + self.epsilon)) elif self.distance_metric == "l1" or self.distance_metric == "linf": # Compute absolute values first dist_pos_abs = torch.abs(predict_pos - true_pos) if self.distance_metric == "l1": pos_dist = torch.sum(dist_pos_abs) else: # linf case pos_dist = torch.sum(torch.max(dist_pos_abs, dim=-1)[0]) else: #combined case # Compute l2 loss summed_pos_sq = torch.sum((predict_pos - true_pos).pow(2), dim=-1, keepdim=False) # Shape (*) l2_pos_dist = torch.sum(torch.sqrt(summed_pos_sq + self.epsilon)) # Compute l1 and linf losses dist_pos_abs = torch.abs(predict_pos - true_pos) l1_pos_dist = torch.sum(dist_pos_abs) linf_pos_dist = torch.sum(torch.max(dist_pos_abs, dim=-1)[0]) # Combine distances pos_dist = l2_pos_dist + l1_pos_dist + linf_pos_dist # Additionally compute orientation error if requested if self.mode == "val": # We directly compute the summed absolute angle error in radians # Note: we don't care about torch-compatible computations since we're not backpropping in this case # Convert torch tensors to numpy arrays predict_ori = predict_ori.view(-1, 4).cpu().detach().numpy() true_ori = true_ori.view(-1, 4).cpu().detach().numpy() # Create var to hold summed orientation error summed_angle_err = 0 # Loop over all quats and compute angle error for i in range(predict_ori.shape[0]): _, angle = quat2axisangle( quat_distance(predict_ori[i], true_ori[i])) # Confine the error to be between +/- pi if angle < -np.pi: angle += 2 * np.pi elif angle > np.pi: angle -= 2 * np.pi summed_angle_err += abs(angle) # Immediately return the position and orientation errors return pos_dist.cpu().detach().numpy(), summed_angle_err elif self.mode == "pose": # Compute ori error (quat distance), roughly d(q1,q2) = 1 - <q1,q2>^2 where <> is the inner product # see https://math.stackexchange.com/questions/90081/quaternion-distance inner_product = torch.sum(predict_ori * true_ori, dim=-1, keepdim=False) # Shape (*) ori_dist_squared = torch.sum(1 - inner_product.pow(2)) # We also add in penalty if w component is negative # see https://dspace.mit.edu/bitstream/handle/1721.1/119699/1078151125-MIT.pdf?sequence=1 w_penalty = torch.clamp(-predict_ori[..., -1], min=0) ori_dist_squared += torch.sum(w_penalty) else: # case "position" # No orientation loss used ori_dist_squared = 0 # Return summed dist return self.scale_factor * (pos_dist + self.alpha * ori_dist_squared)
def input2action(device, robot, active_arm="right", env_configuration=None): """ Converts an input from an active device into a valid action sequence that can be fed into an env.step() call If a reset is triggered from the device, immediately returns None. Else, returns the appropriate action Args: device (Device): A device from which user inputs can be converted into actions. Can be either a Spacemouse or Keyboard device class robot (Robot): Which robot we're controlling active_arm (str): Only applicable for multi-armed setups (e.g.: multi-arm environments or bimanual robots). Allows inputs to be converted correctly if the control type (e.g.: IK) is dependent on arm choice. Choices are {right, left} env_configuration (str or None): Only applicable for multi-armed environments. Allows inputs to be converted correctly if the control type (e.g.: IK) is dependent on the environment setup. Options are: {bimanual, single-arm-parallel, single-arm-opposed} Returns: 2-tuple: - (None or np.array): Action interpreted from @device including any gripper action(s). None if we get a reset signal from the device - (None or int): 1 if desired close, -1 if desired open gripper state. None if get a reset signal from the device """ state = device.get_controller_state() # Note: Devices output rotation with x and z flipped to account for robots starting with gripper facing down # Also note that the outputted rotation is an absolute rotation, while outputted dpos is delta pos # Raw delta rotations from neutral user input is captured in raw_drotation (roll, pitch, yaw) dpos, rotation, raw_drotation, grasp, reset = ( state["dpos"], state["rotation"], state["raw_drotation"], state["grasp"], state["reset"], ) # If we're resetting, immediately return None if reset: return None, None # Get controller reference controller = robot.controller if not isinstance( robot, Bimanual) else robot.controller[active_arm] gripper_dof = robot.gripper.dof if not isinstance( robot, Bimanual) else robot.gripper[active_arm].dof # First process the raw drotation drotation = raw_drotation[[1, 0, 2]] if controller.name == 'IK_POSE': # If this is panda, want to swap x and y axis if isinstance(robot.robot_model, Panda): drotation = drotation[[1, 0, 2]] else: # Flip x drotation[0] = -drotation[0] # Scale rotation for teleoperation (tuned for IK) drotation *= 10 dpos *= 5 # relative rotation of desired from current eef orientation # map to quat drotation = T.mat2quat(T.euler2mat(drotation)) # If we're using a non-forward facing configuration, need to adjust relative position / orientation if env_configuration == "single-arm-opposed": # Swap x and y for pos and flip x,y signs for ori dpos = dpos[[1, 0, 2]] drotation[0] = -drotation[0] drotation[1] = -drotation[1] if active_arm == "left": # x pos needs to be flipped dpos[0] = -dpos[0] else: # y pos needs to be flipped dpos[1] = -dpos[1] # Lastly, map to axis angle form drotation = T.quat2axisangle(drotation) elif controller.name == 'OSC_POSE': # Flip z drotation[2] = -drotation[2] # Scale rotation for teleoperation (tuned for OSC) -- gains tuned for each device drotation = drotation * 1.5 if isinstance(device, Keyboard) else drotation * 50 dpos = dpos * 75 if isinstance(device, Keyboard) else dpos * 125 else: # No other controllers currently supported print( "Error: Unsupported controller specified -- Robot must have either an IK or OSC-based controller!" ) # map 0 to -1 (open) and map 1 to 1 (closed) grasp = 1 if grasp else -1 # Create action based on action space of individual robot action = np.concatenate([dpos, drotation, [grasp] * gripper_dof]) # Return the action and grasp return action, grasp
def controller_action(self, obs: dict, take_action: bool = True, DEBUG: bool = False): observation = obs['observation'] goal_pos = obs['desired_goal'] achieved_goal = obs['achieved_goal'] gripper_pos = observation[:3] gripper_quat = observation[3:7] object_pos = observation[7:10] object_quat = observation[10:] z_table = 0.8610982 object_axang = T.quat2axisangle(object_quat) if abs(object_axang[-1] - 1.24) < 0.2: object_axang_touse = [ 0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8) ] else: object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)] gripper_axang = T.quat2axisangle(gripper_quat) # print('object axang to use ' + str(object_axang_touse)) if self.gripper_reoriented == 0: self.gripper_init_quat = gripper_quat self.gripper_reoriented = 1 init_inv = T.quat_inverse(self.gripper_init_quat) changing_wf = T.quat_multiply(init_inv, gripper_quat) changing_wf_axang = T.quat2axisangle(changing_wf) # gripper_quat_inv = T.quat_inverse(gripper_quat) # changing_wf = T.quat_multiply(gripper_quat_inv,self.gripper_init_quat) # changing_wf_axang = T.quat2axisangle(changing_wf) # print('changing wf axis ' +str(changing_wf_axang)) if not self.object_below_hand or self.gripper_reoriented < 5: self.nut_p = T.quat2axisangle(object_quat)[-1] # print(self.nut_p) if not self.object_below_hand: action = 20 * (object_pos[:2] - gripper_pos[:2]) else: action = [0, 0] action = 20 * (object_pos[:2] - gripper_pos[:2]) # frac = 0.2 # Rate @ which to rotate gripper about z. # ang_goal = frac*self.nut_p # Nut p is the nut's random intial pertubation about z. # if self.gripper_reoriented == 0: # self.gripper_init_quat = gripper_quat # if self.gripper_reoriented < 5: # Gripper should be aligned with nut after 5 action steps # action_angle= [0,0,ang_goal] # #print('object ' + str(object_axang)) # #print('gripper ' + str(gripper_axang)) # init_inv = T.quat_inverse(self.gripper_init_quat) # init_inv_mat = T.quat2mat(init_inv) # rel = T.quat_multiply(gripper_quat,init_inv) # rel_axang = T.quat2axisangle(rel) # #print('rel_axang ' +str(rel_axang)) # rel_axang_WF = np.matmul(init_inv_mat,rel_axang) # #print('rel_axang_WF ' +str(rel_axang_WF)) # if take_action: # self.gripper_reoriented+=1 # else: # After 5 action steps, don't rotate gripper any more # action_angle=[0,0,0] action_angle = 0.2 * (object_axang_touse - changing_wf_axang) action_angle = [0, 0, action_angle[-1]] #action_angle = [0,0,0] #print('action angle ' +str(action_angle)) if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1: if take_action: self.gripper_reoriented = 5 action = np.hstack((action, [0], action_angle, [-1])) if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01: if take_action: self.object_below_hand = True #self.gripper_reoriented = 5 elif not self.object_in_hand: # Close gripper action = [0, 0, -1, 0, 0, 0, -1] if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01: action = [0, 0, 0, 0, 0, 0, 1] if take_action: self.object_in_hand = True else: # Move gripper up and toward goal position action = [0, 0, 1, 0, 0, 0, 1] if object_pos[2] - z_table > 0.1: action = 20 * (goal_pos[:2] - object_pos[:2]) action = np.hstack((action, [0, 0, 0, 0, 1])) if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225: action = [0, 0, 0, 0, 0, 0, -1] # Drop nut once it's close enough to the peg action = np.clip(action, -1, 1) return action
def controller_action(self, obs: dict, take_action: bool = True, DEBUG: bool = False): observation = obs['observation'] goal_pos = obs['desired_goal'] achieved_goal = obs['achieved_goal'] gripper_pos = observation[:3] gripper_quat = observation[3:7] object_pos = observation[7:10] object_quat = observation[10:] z_table = 0.8610982 # the z-coordinate of the table surface object_axang = T.quat2axisangle(object_quat) if abs(object_axang[-1] - 1.24) < 0.2: object_axang_touse = [ 0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8) ] else: object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)] gripper_axang = T.quat2axisangle(gripper_quat) if self.gripper_reoriented == 0: self.gripper_init_quat = gripper_quat self.gripper_reoriented = 1 init_inv = T.quat_inverse(self.gripper_init_quat) changing_wf = T.quat_multiply(init_inv, gripper_quat) changing_wf_axang = T.quat2axisangle(changing_wf) # reorient the gripper to match the nut faces and move above the nut if not self.object_below_hand or self.gripper_reoriented < 5: self.nut_p = T.quat2axisangle(object_quat)[-1] if not self.object_below_hand: action = 20 * (object_pos[:2] - gripper_pos[:2]) else: action = [0, 0] action = 20 * (object_pos[:2] - gripper_pos[:2]) action_angle = 0.2 * (object_axang_touse - changing_wf_axang) action_angle = [0, 0, action_angle[-1]] if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1: if take_action: self.gripper_reoriented = 5 action = np.hstack((action, [0], action_angle, [-1])) if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01: if take_action: self.object_below_hand = True # close the gripper and pick the nut elif not self.object_in_hand: # Close gripper action = [0, 0, -1, 0, 0, 0, -1] if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01: action = [0, 0, 0, 0, 0, 0, 1] if take_action: self.object_in_hand = True else: # Move gripper up and toward goal position action = [0, 0, 1, 0, 0, 0, 1] if object_pos[2] - z_table > 0.1: action = 20 * (goal_pos[:2] - object_pos[:2]) action = np.hstack((action, [0, 0, 0, 0, 1])) if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225: action = [0, 0, 0, 0, 0, 0, -1] # Drop nut once it's close enough to the peg action = np.clip(action, -1, 1) return action
def __init__(self, *args, **kwargs): options = {} # Absolute pose control controller_name = 'OSC_POSE' options["controller_configs"] = suite.load_controller_config( default_controller=controller_name) self.cameraName = "frontview" skip_frame = 2 self.peg_env = suite.make( "TwoArmPegInHole", robots=["IIWA", "IIWA"], **options, has_renderer=False, ignore_done=True, use_camera_obs=True, use_object_obs=True, camera_names=self.cameraName, camera_heights=512, camera_widths=512, ) # Tolerances on position and rotation of peg relative to hole posTol = 0.005 rotTol = 0.05 observation = self.peg_env.reset() # observation["peg_to_hole"] is the vector FROM the hole TO the peg peg_pos0 = observation["hole_pos"] + observation["peg_to_hole"] self.peg_pos0 = peg_pos0 self.hole_pos0 = observation["hole_pos"] # Positions of robots 0 and 1 rel peg and hole, in peg and hole frames. Constant forever. pRob0RelPeg = np.matmul( T.quat2mat(T.quat_inverse(observation["peg_quat"])), observation["robot0_eef_pos"] - (peg_pos0)) pRob1RelHole = np.matmul( T.quat2mat(T.quat_inverse(observation["hole_quat"])), observation["robot1_eef_pos"] - observation["hole_pos"]) qRob0RelPeg = T.quat_multiply( T.quat_inverse(observation["robot0_eef_quat"]), observation["peg_quat"]) qRob1RelHole = T.quat_multiply( T.quat_inverse(observation["robot1_eef_quat"]), observation["hole_quat"]) # Store geometric constants model = self.peg_env.model.get_model() pegDim = model.geom_size[15] rPeg = pegDim[0] lPeg = pegDim[2] distSlack = 2 * lPeg # Set up optimization problem. # Define 3 keyframes: peg higher than hole, peg centered above hole with hole facing up, and peg in hole. # One constraint for each keyframe, and one constraint for unit quaternions nonlinear_constraint_1 = NonlinearConstraint(self.cons_1, distSlack, np.inf, jac='2-point', hess=BFGS()) nonlinear_constraint_2 = NonlinearConstraint( self.cons_2, np.array([-posTol, -posTol, distSlack, -rotTol]), np.array([posTol, posTol, np.inf, rotTol]), jac='2-point', hess=BFGS()) nonlinear_constraint_3 = NonlinearConstraint( self.cons_3, np.array([-posTol, -posTol, distSlack]), np.array([posTol, posTol, np.inf]), jac='2-point', hess=BFGS()) nonlinear_constraint_4 = NonlinearConstraint(self.cons_unit_quat, np.array([1, 1, 1, 1]), np.array([1, 1, 1, 1]), jac='2-point', hess=BFGS()) # Initial guess for optimizer x0 = np.tile( np.hstack((peg_pos0, observation["hole_pos"], observation["peg_quat"], observation["hole_quat"])), 3) # Cut out quat from last chunk x0 = x0[0:34] # Solve optimization problem res = minimize(self.traj_obj, x0, method='trust-constr', jac='2-point', hess=BFGS(), constraints=[ nonlinear_constraint_1, nonlinear_constraint_2, nonlinear_constraint_3, nonlinear_constraint_4 ], options={'verbose': 1}) # 'xtol': 1e-6, x = res.x # Extract optimization results # x = [p_peg_1, p_hole_1, q_peg_1, q_hole_1, p_peg_2, ... q_peg_3, q_hole_3] ind_offset_1 = 14 ind_offset_2 = 28 p_peg_1 = x[0:3] p_hole_1 = x[3:6] p_peg_2 = x[ind_offset_1 + 0:ind_offset_1 + 3] p_hole_2 = x[ind_offset_1 + 3:ind_offset_1 + 6] p_peg_3 = x[ind_offset_2 + 0:ind_offset_2 + 3] p_hole_3 = x[ind_offset_2 + 3:ind_offset_2 + 6] q_peg_1 = x[6:10] q_hole_1 = x[10:14] q_peg_2 = x[ind_offset_1 + 6:ind_offset_1 + 10] q_hole_2 = x[ind_offset_1 + 10:ind_offset_1 + 14] # Use the same orientations as in pose 2 q_peg_3 = q_peg_2 q_hole_3 = q_hole_2 # Robot rel world = peg rel world + robot rel peg # Robot rel peg in world frame = (q world frame rel peg frame) * (robot rel peg in peg frame) q_rob0_1 = T.quat_inverse( T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_1))) q_rob1_1 = T.quat_inverse( T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_1))) q_rob0_2 = T.quat_inverse( T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_2))) q_rob1_2 = T.quat_inverse( T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_2))) q_rob0_3 = T.quat_inverse( T.quat_multiply(qRob0RelPeg, T.quat_inverse(q_peg_3))) q_rob1_3 = T.quat_inverse( T.quat_multiply(qRob1RelHole, T.quat_inverse(q_hole_3))) self.p_rob0_1 = p_peg_1 + np.matmul(T.quat2mat(q_peg_1), pRob0RelPeg) self.p_rob1_1 = p_hole_1 + np.matmul(T.quat2mat(q_hole_1), pRob1RelHole) self.p_rob0_2 = p_peg_2 + np.matmul(T.quat2mat(q_peg_2), pRob0RelPeg) self.p_rob1_2 = p_hole_2 + np.matmul(T.quat2mat(q_hole_2), pRob1RelHole) self.p_rob0_3 = p_peg_3 + np.matmul(T.quat2mat(q_peg_3), pRob0RelPeg) self.p_rob1_3 = p_hole_3 + np.matmul(T.quat2mat(q_hole_3), pRob1RelHole) self.axang_rob0_1 = T.quat2axisangle(q_rob0_1) self.axang_rob1_1 = T.quat2axisangle(q_rob1_1) self.axang_rob0_2 = T.quat2axisangle(q_rob0_2) self.axang_rob1_2 = T.quat2axisangle(q_rob1_2) self.axang_rob0_3 = T.quat2axisangle(q_rob0_3) self.axang_rob1_3 = T.quat2axisangle(q_rob1_3) self.max_episode_steps = 200 # Gains for rotation and position error compensation rates self.kpp = 4 self.kpr = 0.1 # Initial pose Information self.rob0quat0 = observation["robot0_eef_quat"] self.rob1quat0 = observation["robot1_eef_quat"] self.rob0pos0 = observation["robot0_eef_pos"] self.rob1pos0 = observation["robot1_eef_pos"]
def _reset_internal(self): """ Resets simulation internal configurations. """ super()._reset_internal() # Reset all object positions using initializer sampler if we're not directly loading from an xml if not self.deterministic_reset: # Sample from the placement initializer for all objects object_placements = self.placement_initializer.sample() # Loop through all objects and reset their positions for obj_pos, _, obj in object_placements.values(): self.sim.data.set_joint_qpos( obj.joints[0], np.concatenate( [np.array(obj_pos), np.array([0.5, 0.5, -0.5, -0.5])])) self.sim.forward() # update sim states # says if probe has been in touch with torso self.has_touched_torso = False # initial position of end-effector self.ee_initial_pos = self._eef_xpos # create trajectory self.trajectory = self.get_trajectory() # initialize trajectory step self.initial_traj_step = np.random.default_rng().uniform( low=0, high=self.num_waypoints - 1) self.traj_step = self.initial_traj_step # step at which to evaluate trajectory. Must be in interval [0, num_waypoints - 1] # set first trajectory point self.traj_pt = self.trajectory.eval(self.traj_step) self.traj_pt_vel = self.trajectory.deriv(self.traj_step) # give controller access to robot (and its measurements) if self.robots[0].controller.name == "HMFC": self.robots[0].controller.set_robot(self.robots[0]) # initialize controller's trajectory self.robots[0].controller.traj_pos = self.traj_pt self.robots[0].controller.traj_ori = T.quat2axisangle(self.goal_quat) # get initial joint positions for robot init_qpos = self._get_initial_qpos() # override initial robot joint positions self.robots[0].set_robot_joint_positions(init_qpos) # update controller with new initial joints self.robots[0].controller.update_initial_joints(init_qpos) # initialize previously contact force measurement self.prev_z_contact_force = 0 # intialize derivative of contact force self.der_z_contact_force = 0 # initialize running mean of velocity self.vel_running_mean = np.linalg.norm(self.robots[0]._hand_vel) # initialize running mean of contact force self.z_contact_force_running_mean = self.sim.data.cfrc_ext[ self.probe_id][-1] # initialize data collection if self.save_data: # simulation data self.data_ee_pos = np.array(np.zeros((self.horizon, 3))) self.data_ee_goal_pos = np.array(np.zeros((self.horizon, 3))) self.data_ee_ori_diff = np.array(np.zeros(self.horizon)) self.data_ee_vel = np.array(np.zeros((self.horizon, 3))) self.data_ee_goal_vel = np.array(np.zeros(self.horizon)) self.data_ee_running_mean_vel = np.array(np.zeros(self.horizon)) self.data_ee_quat = np.array(np.zeros( (self.horizon, 4))) # (x,y,z,w) self.data_ee_goal_quat = np.array(np.zeros( (self.horizon, 4))) # (x,y,z,w) self.data_ee_diff_quat = np.array(np.zeros( self.horizon)) # (x,y,z,w) self.data_ee_z_contact_force = np.array(np.zeros(self.horizon)) self.data_ee_z_goal_contact_force = np.array(np.zeros( self.horizon)) self.data_ee_z_running_mean_contact_force = np.array( np.zeros(self.horizon)) self.data_ee_z_derivative_contact_force = np.array( np.zeros(self.horizon)) self.data_ee_z_goal_derivative_contact_force = np.array( np.zeros(self.horizon)) self.data_is_contact = np.array(np.zeros(self.horizon)) self.data_q_pos = np.array( np.zeros((self.horizon, self.robots[0].dof))) self.data_q_torques = np.array( np.zeros((self.horizon, self.robots[0].dof))) self.data_time = np.array(np.zeros(self.horizon)) # reward data self.data_pos_reward = np.array(np.zeros(self.horizon)) self.data_ori_reward = np.array(np.zeros(self.horizon)) self.data_vel_reward = np.array(np.zeros(self.horizon)) self.data_force_reward = np.array(np.zeros(self.horizon)) self.data_der_force_reward = np.array(np.zeros(self.horizon)) # policy/controller data self.data_action = np.array( np.zeros((self.horizon, self.robots[0].action_dim)))