def _goal_distance(self, goal_a, goal_b): assert goal_a.shape == goal_b.shape assert goal_a.shape[-1] == 7 d_pos = np.zeros_like(goal_a[..., 0]) d_rot = np.zeros_like(goal_b[..., 0]) if self.target_position != 'ignore': delta_pos = goal_a[..., :3] - goal_b[..., :3] d_pos = np.linalg.norm(delta_pos, axis=-1) if self.target_rotation != 'ignore': quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] if self.ignore_z_target_rotation: # Special case: We want to ignore the Z component of the rotation. # This code here assumes Euler angles with xyz convention. We first transform # to euler, then set the Z component to be equal between the two, and finally # transform back into quaternions. euler_a = rotations.quat2euler(quat_a) euler_b = rotations.quat2euler(quat_b) euler_a[2] = euler_b[2] quat_a = rotations.euler2quat(euler_a) # Subtract quaternions and extract angle between them. quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff assert d_pos.shape == d_rot.shape return d_pos, d_rot
def _sample_goal(self): # Select a goal for the object position. target_pos = None if self.target_position == 'random': assert self.target_position_range.shape == (3, 2) offset = self.np_random.uniform(self.target_position_range[:, 0], self.target_position_range[:, 1]) assert offset.shape == (3, ) target_pos = self.sim.data.get_joint_qpos( 'object:joint')[:3] + offset elif self.target_position in ['ignore', 'fixed']: target_pos = self.sim.data.get_joint_qpos('object:joint')[:3] else: raise error.Error('Unknown target_position option "{}".'.format( self.target_position)) assert target_pos is not None assert target_pos.shape == (3, ) # Select a goal for the object rotation. target_quat = None if self.target_rotation == 'z': angle = self.np_random.uniform(-np.pi, np.pi) axis = np.array([0., 0., 1.]) target_quat = quat_from_angle_and_axis(angle, axis) elif self.target_rotation == 'parallel': angle = self.np_random.uniform(-np.pi, np.pi) axis = np.array([0., 0., 1.]) target_quat = quat_from_angle_and_axis(angle, axis) parallel_quat = self.parallel_quats[self.np_random.randint( len(self.parallel_quats))] target_quat = rotations.quat_mul(target_quat, parallel_quat) elif self.target_rotation == 'xyz': angle = self.np_random.uniform(-np.pi, np.pi) axis = self.np_random.uniform(-1., 1., size=3) target_quat = quat_from_angle_and_axis(angle, axis) elif self.target_rotation in ['ignore', 'fixed']: target_quat = self.sim.data.get_joint_qpos('object:joint') else: raise error.Error('Unknown target_rotation option "{}".'.format( self.target_rotation)) assert target_quat is not None assert target_quat.shape == (4, ) target_quat /= np.linalg.norm(target_quat) # normalized quaternion goal = np.concatenate([target_pos, target_quat]) return goal
def rollout(env, model, noise, config, normalizer=None, render=False, step=(1, 5), boundary_sample=False): trajectories = [] for i_agent in range(2): trajectories.append([]) # monitoring variables episode_reward = np.zeros(env.num_envs) frames = [] state_all = env.reset() state_all = back_to_dict(state_all, config) step_l, step_h = step for i_step in range(config['episode_length']): model.to_cpu() obs = [ K.tensor(obs, dtype=K.float32) for obs in state_all['observation'] ] goal = K.tensor(state_all['desired_goal'], dtype=K.float32) if i_step == 0: if (np.random.rand() < config['objtraj_p'] and not boundary_sample ) or (step_h >= config['episode_length']): objtraj_goal = goal original_goal = True else: original_goal = False achieved_goal = K.tensor(state_all['achieved_goal'], dtype=K.float32) objtraj_goal = [] goal_len_per_obj = goal.shape[1] // model.n_objects for i_object in range(model.n_objects): achieved_goal_per_obj = achieved_goal[:, i_object * goal_len_per_obj: (i_object + 1) * goal_len_per_obj] goal_per_obj = goal[:, i_object * goal_len_per_obj:(i_object + 1) * goal_len_per_obj] normed_achieved_goal_per_obj, normed_goal_per_goal, normed_step = normalizer[ 2].process( achieved_goal_per_obj, goal_per_obj, K.randint(low=step_l, high=step_h + 1, size=(achieved_goal_per_obj.shape[0], 1))) with K.no_grad(): objtraj_goal_per_obj = model.obj_traj( normed_achieved_goal_per_obj.to('cuda'), normed_goal_per_goal.to('cuda'), normed_step.to('cuda')) objtraj_goal.append(objtraj_goal_per_obj.cpu()) objtraj_goal = K.cat(objtraj_goal, dim=-1) # Observation normalization obs_goal = [] for i_agent in range(2): obs_goal.append(K.cat([obs[i_agent], objtraj_goal], dim=-1)) if normalizer[i_agent] is not None: #if not original_goal: # obs_goal[i_agent] = normalizer[i_agent].preprocess(obs_goal[i_agent]) #else: obs_goal[i_agent] = normalizer[i_agent].preprocess_with_update( obs_goal[i_agent]) action = model.select_action(obs_goal[0], noise).cpu().numpy() action_to_env = np.zeros((len(action), len(env.action_space.sample()))) action_to_env[:, 0:action.shape[1]] = action action_to_mem = action_to_env next_state_all, reward, done, info = env.step(action_to_env) next_state_all = back_to_dict(next_state_all, config) reward = K.tensor(reward, dtype=dtype).view(-1, 1) episode_reward += reward.squeeze(1).cpu().numpy() for i_agent in range(2): state = { 'observation': state_all['observation'][i_agent].copy(), 'achieved_goal': state_all['achieved_goal'].copy(), 'desired_goal': objtraj_goal.numpy().copy() } trajectories[i_agent].append((state.copy(), action_to_mem, reward)) goal_a = state_all['achieved_goal'] goal_b = objtraj_goal.numpy().copy() #state_all['desired_goal'] ENV_NAME = config['env_id'] if 'Rotate' in ENV_NAME: quat_a = goal_a[:, 3:] quat_b = goal_b[:, 3:] # Move to the next state state_all = next_state_all # Record frames if render: frames.append(env.render(mode='rgb_array')[0]) if 'Rotate' in ENV_NAME: # Subtract quaternions and extract angle between them. quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.)) d_rot = angle_diff distance = (d_rot < 0.1).astype(np.float32) else: distance = np.linalg.norm(goal_a - goal_b, axis=-1) distance = (distance < 0.05).astype(np.float32) obs, ags, goals, acts = [], [], [], [] for trajectory in trajectories: obs.append([]) ags.append([]) goals.append([]) acts.append([]) for i_step in range(config['episode_length']): obs[-1].append(trajectory[i_step][0]['observation']) ags[-1].append(trajectory[i_step][0]['achieved_goal']) goals[-1].append(trajectory[i_step][0]['desired_goal']) if (i_step < config['episode_length'] - 1): acts[-1].append(trajectory[i_step][1]) trajectories = { 'o': np.concatenate(obs, axis=-1).swapaxes(0, 1), 'ag': np.asarray(ags)[0, ].swapaxes(0, 1), 'g': np.asarray(goals)[0, ].swapaxes(0, 1), 'u': np.asarray(acts)[0, ].swapaxes(0, 1), } info = np.asarray([i_info['is_success'] for i_info in info]) return trajectories, episode_reward, info, distance
def _reset_sim(self): self.sim.set_state(self.initial_state) self.sim.forward() initial_qpos = self.sim.data.get_joint_qpos('object:joint').copy() initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:] assert initial_qpos.shape == (7, ) assert initial_pos.shape == (3, ) assert initial_quat.shape == (4, ) initial_qpos = None # Randomization initial rotation. if self.randomize_initial_rotation: if self.target_rotation == 'z': angle = self.np_random.uniform(-np.pi, np.pi) axis = np.array([0., 0., 1.]) offset_quat = quat_from_angle_and_axis(angle, axis) initial_quat = rotations.quat_mul(initial_quat, offset_quat) elif self.target_rotation == 'parallel': angle = self.np_random.uniform(-np.pi, np.pi) axis = np.array([0., 0., 1.]) z_quat = quat_from_angle_and_axis(angle, axis) parallel_quat = self.parallel_quats[self.np_random.randint( len(self.parallel_quats))] offset_quat = rotations.quat_mul(z_quat, parallel_quat) initial_quat = rotations.quat_mul(initial_quat, offset_quat) elif self.target_rotation in ['xyz', 'ignore']: angle = self.np_random.uniform(-np.pi, np.pi) axis = self.np_random.uniform(-1., 1., size=3) offset_quat = quat_from_angle_and_axis(angle, axis) initial_quat = rotations.quat_mul(initial_quat, offset_quat) elif self.target_rotation == 'fixed': pass else: raise error.Error( 'Unknown target_rotation option "{}".'.format( self.target_rotation)) # Randomize initial position. if self.randomize_initial_position: if self.target_position != 'fixed': initial_pos += self.np_random.normal(size=3, scale=0.005) initial_quat /= np.linalg.norm(initial_quat) initial_qpos = np.concatenate([initial_pos, initial_quat]) self.sim.data.set_joint_qpos('object:joint', initial_qpos) def is_on_palm(): self.sim.forward() cube_middle_idx = self.sim.model.site_name2id('object:center') cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx] is_on_palm = (cube_middle_pos[2] > 0.04) return is_on_palm # Run the simulation for a bunch of timesteps to let everything settle in. for _ in range(10): self._set_action(np.zeros(20)) try: self.sim.step() except mujoco_py.MujocoException: return False return is_on_palm()