def generate_test_set(levels: typing.List[int], samples_per_level: int, logfile_tmpl: str) -> typing.List[TestSample]: """Generate random test set for policy evaluation. For each difficulty level a list of samples, consisting of randomized initial pose and goal pose, is generated. Args: levels: List of difficulty levels. samples_per_level: How many samples are generated per level. logfile_tmpl: Format string for the log file associated with this sample. Needs to contain placeholders "{level}" and "{iteration}". Returns: List of ``len(levels) * samples_per_level`` test samples. """ samples = [] for level in levels: for i in range(samples_per_level): init = move_cube.sample_goal(-1) goal = move_cube.sample_goal(level) logfile = logfile_tmpl.format(level=level, iteration=i) samples.append( TestSample(level, i, init.to_json(), goal.to_json(), logfile)) return samples
def get_initial_state(self): """Get a random initial object pose (always on the ground).""" init = move_cube.sample_goal(difficulty=-1) init.position[:2] = sample_uniform_from_circle(0.07) if self.difficulty == 4: self.goal = move_cube.sample_goal(self.difficulty) # sample orientation projected_goal_ori = project_cube_xy_plane(self.goal.orientation) z_rot_noise = Rotation.from_euler('z', (np.pi / 2 * 0.70) * random.random()) init.orientation = ( z_rot_noise * Rotation.from_quat(projected_goal_ori)).as_quat() return init
def _reset_direct_simulation(self): """Reset direct simulation. With this the env can be used without backend. """ # set this to false to disable pyBullet's simulation visualization = True # reset simulation del self.platform # initialize simulation initial_object_pose = move_cube.sample_goal(difficulty=-1) self.platform = trifinger_simulation.TriFingerPlatform( visualization=visualization, initial_object_pose=initial_object_pose, ) # visualize the goal if visualization: self.goal_marker = trifinger_simulation.visual_objects.CubeMarker( width=0.065, position=self.goal["position"], orientation=self.goal["orientation"], physicsClientId=self.platform.simfinger._pybullet_client_id, )
def compute_reward(logdir, simulation): with open(os.path.join(logdir, "goal.json"), 'r') as f: goal = json.load(f) difficulty = goal['difficulty'] goal_pose = move_cube.Pose(position=np.array(goal['goal']['position']), orientation=np.array( goal['goal']['orientation'])) if (simulation == 0): log = robot_fingers.TriFingerPlatformLog( os.path.join(logdir, "robot_data.dat"), os.path.join(logdir, "camera_data.dat")) reward = 0.0 for t in range(log.get_first_timeindex(), log.get_last_timeindex() + 1): camera_observation = log.get_camera_observation(t) reward -= evaluate_state(goal_pose, camera_observation.filtered_object_pose, difficulty) return reward, True else: path = os.path.join(logdir, 'observations.pkl') with open(path, 'rb') as handle: observations = pkl.load(handle) reward = 0.0 ex_state = move_cube.sample_goal(difficulty=-1) for i in range(len(observations)): ex_state.position = observations[i]["achieved_goal"]["position"] ex_state.orientation = observations[i]["achieved_goal"][ "orientation"] reward -= evaluate_state(goal_pose, ex_state, difficulty) return reward, True
def _reset_direct_simulation(self): """Reset direct simulation. With this the env can be used without backend. """ # reset simulation del self.platform # initialize simulation if self.initializer is None: initial_object_pose = move_cube.sample_goal(difficulty=-1) else: initial_object_pose = self.initializer.get_initial_state() self.goal = self.initializer.get_goal().to_dict() self.platform = trifinger_simulation.TriFingerPlatform( visualization=self.visualization, initial_object_pose=initial_object_pose, ) # use mass of real cube p.changeDynamics( bodyUniqueId=self.platform.cube.block, linkIndex=-1, physicsClientId=self.platform.simfinger._pybullet_client_id, mass=CUBOID_MASS) # visualize the goal if self.visualization: self.goal_marker = trifinger_simulation.visual_objects.CuboidMarker( size=CUBOID_SIZE, position=self.goal["position"], orientation=self.goal["orientation"], pybullet_client_id=self.platform.simfinger._pybullet_client_id, )
def _reset_direct_simulation(self): """Reset direct simulation. With this the env can be used without backend. """ # reset simulation del self.platform # initialize simulation if self.initializer is None: initial_object_pose = move_cube.sample_goal(difficulty=-1) else: initial_object_pose = self.initializer.get_initial_state() self.goal = self.initializer.get_goal() self.platform = trifinger_simulation.TriFingerPlatform( visualization=self.visualization, initial_object_pose=initial_object_pose, ) # visualize the goal if self.visualization: self.goal_marker = trifinger_simulation.visual_objects.CubeMarker( width=0.065, position=self.goal["position"], orientation=self.goal["orientation"], physicsClientId=self.platform.simfinger._pybullet_client_id, )
def get_initial_state(self): """Get the initial position based on the information from the file. Do not read z coordinate to ensure that this position is on the ground plane""" ex_state = move_cube.sample_goal(difficulty=-1) ex_state.position = self.init_information[ 0:2] # read only two values, z-coordinate is unchanged ex_state.orientation = self.init_information[3:7] return ex_state
def reset(self): # reset simulation del self.platform # initialize simulation if self.initializer is None: # if no initializer is given (which will be the case during training), # we can initialize in any way desired. here, we initialize the cube always # in the center of the arena, instead of randomly, as this appears to help # training initial_robot_position = ( TriFingerPlatform.spaces.robot_position.default ) default_object_position = ( TriFingerPlatform.spaces.object_position.default ) default_object_orientation = ( TriFingerPlatform.spaces.object_orientation.default ) initial_object_pose = move_cube.Pose( position=default_object_position, orientation=default_object_orientation, ) goal_object_pose = move_cube.sample_goal(difficulty=1) else: # if an initializer is given, i.e. during evaluation, we need to initialize # according to it, to make sure we remain coherent with the standard CubeEnv. # otherwise the trajectories produced during evaluation will be invalid. initial_robot_position = ( TriFingerPlatform.spaces.robot_position.default ) initial_object_pose = self.initializer.get_initial_state() goal_object_pose = self.initializer.get_goal() self.platform = TriFingerPlatform( visualization=self.visualization, initial_robot_position=initial_robot_position, initial_object_pose=initial_object_pose, ) self.goal = { "position": goal_object_pose.position, "orientation": goal_object_pose.orientation, } # visualize the goal if self.visualization: self.goal_marker = visual_objects.CubeMarker( width=0.065, position=goal_object_pose.position, orientation=goal_object_pose.orientation, pybullet_client_id=self.platform.simfinger._pybullet_client_id, ) self.info = dict() self.step_count = 0 return self._create_observation(0)
def get_goal(self): """Get a random goal depending on the difficulty.""" ori_error = 100000 # some large value while ori_error < np.pi/2 * 0.1 or \ ori_error > self.orientation_error_threshold: goal = move_cube.sample_goal(difficulty=4) # goal.position[:2] = self.init.position[:2] # TEMP: align x and y ori_error = self._weighted_orientation_error(goal) # pos_error = self._weighted_position_error(goal) return goal
def compute_reward_adaptive_behind(logdir, simulation, TOTALTIMESTEPS): with open(os.path.join(logdir, "goal.json"), 'r') as f: goal = json.load(f) difficulty = goal['difficulty'] goal_pose = move_cube.Pose(position=np.array(goal['goal']['position']), orientation=np.array( goal['goal']['orientation'])) indice = goal['reachstart'] if (indice == -1): return -10000, False if (simulation == 0): min_length = TOTALTIMESTEPS log = robot_fingers.TriFingerPlatformLog( os.path.join(logdir, "robot_data.dat"), os.path.join(logdir, "camera_data.dat")) reward = 0.0 count = 0 for t in range(log.get_last_timeindex() - TOTALTIMESTEPS, log.get_last_timeindex() + 1): count += 1 camera_observation = log.get_camera_observation(t) reward -= evaluate_state(goal_pose, camera_observation.filtered_object_pose, difficulty) if (count == min_length): break if (count == min_length): return reward, True else: return -10000, False else: min_length = TOTALTIMESTEPS path = os.path.join(logdir, 'observations.pkl') with open(path, 'rb') as handle: observations = pkl.load(handle) reward = 0.0 count = 0 ex_state = move_cube.sample_goal(difficulty=-1) for i in range( len(observations) - TOTALTIMESTEPS - 1, len(observations)): count += 1 ex_state.position = observations[i]["achieved_goal"]["position"] ex_state.orientation = observations[i]["achieved_goal"][ "orientation"] reward -= evaluate_state(goal_pose, ex_state, difficulty) if (count == min_length): break if (count == min_length): return reward, True else: return -10000, False
def get_goal(self): """Get a random goal depending on the difficulty.""" if self.difficulty == 4: if self.goal is None: raise ValueError( "Goal is unset. Call get_initial_state before " "get_goal.") else: goal = self.goal self.goal = None else: goal = move_cube.sample_goal(difficulty=self.difficulty) return goal
def test_sample_goal_difficulty_4_no_initial_pose(): for i in range(1000): goal = move_cube.sample_goal(difficulty=4) # verify the goal is valid (i.e. within the allowed ranges) try: move_cube.validate_goal(goal) except move_cube.InvalidGoalError as e: pytest.fail(msg="Invalid goal: {} pose is {}, {}".format( e, e.position, e.orientation), ) # verify the goal satisfies conditions of difficulty 2 assert goal.position[2] <= move_cube._max_height assert goal.position[2] >= move_cube._min_height
def sample_goal() -> Trajectory: """Sample a goal trajectory with random steps. The number of goals in the trajectory is depending on the episode length (see :data:`EPISODE_LENGTH`). The first goal has a duration of :data:`FIRST_GOAL_DURATION` steps, the following ones a duration of :data:`GOAL_DURATION`. Returns: Trajectory as a list of tuples ``(t, position)`` where ``t`` marks the time step from which the goal is active. """ trajectory = [] first_goal = move_cube.sample_goal(GOAL_DIFFICULTY) trajectory.append((0, first_goal.position)) t = FIRST_GOAL_DURATION while t < EPISODE_LENGTH: goal = move_cube.sample_goal(GOAL_DIFFICULTY) trajectory.append((t, goal.position)) t += GOAL_DURATION return trajectory
def main(): difficulty = int(sys.argv[1]) if len(sys.argv) == 3: goal_pose_json = sys.argv[2] else: goal_pose = move_cube.sample_goal(difficulty) goal_pose_json = json.dumps({ 'position': goal_pose.position.tolist(), 'orientation': goal_pose.orientation.tolist() }) env = _init_env(goal_pose_json, difficulty) state_machine = TriFingerStateMachine(env) state_machine.run()
def test_sample_goal_difficulty_1_no_initial_pose(): for i in range(1000): goal = move_cube.sample_goal(difficulty=1) # verify the goal is valid (i.e. within the allowed ranges) try: move_cube.validate_goal(goal) except move_cube.InvalidGoalError as e: pytest.fail(msg="Invalid goal: {} pose is {}, {}".format( e, e.position, e.orientation), ) # verify the goal satisfies conditions of difficulty 1 # always on ground assert goal.position[2] == (move_cube._CUBE_WIDTH / 2) # no orientation np.testing.assert_array_equal(goal.orientation, [0, 0, 0, 1])
def test_sample_goal_difficulty_4(self): for i in range(1000): goal = move_cube.sample_goal(difficulty=4) # verify the goal is valid (i.e. within the allowed ranges) try: move_cube.validate_goal(goal) except move_cube.InvalidGoalError as e: self.fail( msg="Invalid goal: {} pose is {}, {}".format( e, e.position, e.orientation ), ) # verify the goal satisfies conditions of difficulty 2 self.assertLessEqual(goal.position[2], move_cube._max_height) self.assertGreaterEqual(goal.position[2], move_cube._min_height)
def __init__(self, difficulty): """Initialize. Args: difficulty (int): Difficulty level for sampling goals. """ from code.align_rotation import project_cube_xy_plane self.difficulty = difficulty self._goal = move_cube.sample_goal(difficulty=self.difficulty) self._goal.position[:2] = sample_uniform_from_circle(0.05) # centered self._goal.position[2] = min(self._goal.position[2], 0.2) projected_ori = project_cube_xy_plane(self._goal.orientation) projected_pos = self._goal.position * np.array([1, 1, 0]) + np.array( [0, 0, 0.015]) self._init = move_cube.Pose.from_dict({ 'position': projected_pos, 'orientation': projected_ori })
def set_goal(self, pos=None, orientation=None, log_timestep=True): ex_state = move_cube.sample_goal( difficulty=-1) # ensures that on the ground if not (pos is None): self.goal["position"] = ex_state.position self.goal["position"][:3] = pos[:3] if not (orientation is None): self.goal["orientation"] = ex_state.orientation self.goal["orientation"] = orientation if (self.visualization): self.cube_viz.goal_viz = None if (log_timestep): if (self.simulation): self.change_goal_last = self.platform.get_current_timeindex() else: self.change_goal_last = self.real_platform.get_current_timeindex( )
def test_sample_goal_difficulty_1(self): for i in range(1000): goal = move_cube.sample_goal(difficulty=1) # verify the goal is valid (i.e. within the allowed ranges) try: move_cube.validate_goal(goal) except move_cube.InvalidGoalError as e: self.fail( msg="Invalid goal: {} pose is {}, {}".format( e, e.position, e.orientation ), ) # verify the goal satisfies conditions of difficulty 1 # always on ground self.assertEqual(goal.position[2], move_cube._CUBOID_HALF_SIZE[2]) # no orientation np.testing.assert_array_equal(goal.orientation, [0, 0, 0, 1])
def main(): parser = argparse.ArgumentParser('args') parser.add_argument('difficulty', type=int) parser.add_argument( 'method', type=str, help="The method to run. One of 'mp-pg', 'cic-cg', 'cpc-tg'") parser.add_argument( '--residual', default=False, action='store_true', help= "add to use residual policies. Only compatible with difficulties 3 and 4." ) parser.add_argument('--bo', default=False, action='store_true', help="add to use BO optimized parameters.") args = parser.parse_args() goal_pose = move_cube.sample_goal(args.difficulty) goal_pose_dict = { 'position': goal_pose.position.tolist(), 'orientation': goal_pose.orientation.tolist() } env = _init_env(goal_pose_dict, args.difficulty) state_machine = create_state_machine(args.difficulty, args.method, env, args.residual, args.bo) ##################### # Run state machine ##################### obs = env.reset() state_machine.reset() done = False while not done: action = state_machine(obs) obs, _, done, _ = env.step(action)
goal_rot = Rotation.from_quat(self.obs['goal_object_orientation']) y_axis = [0, 1, 0] goal_direction_vector = goal_rot.apply(y_axis) actual_direction_vector = actual_rot.apply(y_axis) quat = get_rotation_between_vecs(actual_direction_vector, goal_direction_vector) return (Rotation.from_quat(quat) * actual_rot).as_quat() if __name__ == '__main__': from code.make_env import make_training_env from trifinger_simulation.tasks import move_cube import dl seed = 0 dl.rng.seed(seed) env = make_training_env(move_cube.sample_goal(-1).to_dict(), 4, reward_fn='competition_reward', termination_fn='position_close_to_goal', initializer='centered_init', action_space='torque_and_position', sim=True, visualization=True, episode_length=10000, rank=seed) state_machine = TriFingerStateMachine(env) state_machine.run()
def main(args): goal = move_cube.sample_goal(args.difficulty) goal_dict = {'position': goal.position, 'orientation': goal.orientation} eval_config = { 'cube_goal_pose': goal_dict, 'goal_difficulty': args.difficulty, 'action_space': 'torque' if args.policy == 'fc' else 'torque_and_position', 'frameskip': 3, 'reward_fn': 'competition_reward', 'termination_fn': 'no_termination', 'initializer': 'training_init', 'sim': True, 'monitor': False, 'rank': args.seed, 'training': True } env = make_training_env(visualization=True, **eval_config) acc_rewards = [] wallclock_times = [] aligning_steps = [] env_steps = [] avg_rewards = [] for i in range(args.num_episodes): start = time.time() is_done = False observation = env.reset() accumulated_reward = 0 aligning_steps.append(env.unwrapped.step_count) #clear some windows in GUI p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0) #change camera parameters # You can also rotate the camera by CTRL + drag p.resetDebugVisualizerCamera(cameraDistance=0.6, cameraYaw=0, cameraPitch=-40, cameraTargetPosition=[0, 0, 0]) step = 0 while not is_done and step < 2000: step += 1 action = env.action_space.sample() if isinstance(action, dict): action['torque'] *= 0 action['position'] *= 0 else: action *= 0 observation, reward, is_done, info = env.step(action) accumulated_reward += reward acc_rewards.append(accumulated_reward) env_steps.append(env.unwrapped.step_count - aligning_steps[-1]) avg_rewards.append(accumulated_reward / env_steps[-1]) print("Episode {}\tAccumulated reward: {}".format( i, accumulated_reward)) print("Episode {}\tAlinging steps: {}".format(i, aligning_steps[-1])) print("Episode {}\tEnv steps: {}".format(i, env_steps[-1])) print("Episode {}\tAvg reward: {}".format(i, avg_rewards[-1])) end = time.time() print('Elapsed:', end - start) wallclock_times.append(end - start) env.close() def _print_stats(name, data): print('======================================') print(f'Mean {name}\t{np.mean(data):.2f}') print(f'Max {name}\t{max(data):.2f}') print(f'Min {name}\t{min(data):.2f}') print(f'Median {name}\t{statistics.median(data):2f}') print('Total elapsed time\t{:.2f}'.format(sum(wallclock_times))) print('Mean elapsed time\t{:.2f}'.format( sum(wallclock_times) / len(wallclock_times))) _print_stats('acc reward', acc_rewards) _print_stats('aligning steps', aligning_steps) _print_stats('step reward', avg_rewards)
if one_pose: cube_path = [cube_path] tip_path = _get_tip_path(grasp.cube_tip_pos, cube_path) path = Path(cube_path, joint_conf_path, tip_path, grasp) path.set_min_height(self.env, path_min_height) return path if __name__ == '__main__': from trifinger_simulation.tasks import move_cube from env.make_env import make_env reward_fn = 'competition_reward' termination_fn = 'position_close_to_goal' initializer = 'small_rot_init' env = make_env(move_cube.sample_goal(-1).to_dict(), 4, reward_fn=reward_fn, termination_fn=termination_fn, initializer=initializer, action_space='position', sim=True, visualization=True) for i in range(1): obs = env.reset() pos = obs["object_position"] quat = obs["object_orientation"] goal_pos = obs["goal_object_position"] goal_quat = obs["goal_object_orientation"]
def make_training_env(nenv, state_machine, goal_difficulty, action_space, residual_state, frameskip=1, sim=True, visualization=False, reward_fn=None, termination_fn=None, initializer=None, episode_length=100000, monitor=False, seed=0, domain_randomization=False, norm_observations=False, max_torque=0.1): # dummy goal dict goal = move_cube.sample_goal(goal_difficulty) goal_dict = {'position': goal.position, 'orientation': goal.orientation} def _env(rank): def _thunk(): env = make_env( cube_goal_pose=goal_dict, goal_difficulty=goal_difficulty, action_space=action_space, frameskip=frameskip, sim=sim, visualization=visualization, reward_fn=reward_fn, termination_fn=termination_fn, initializer=initializer, episode_length=10 * episode_length, # make this long enough to ensure that we have "episode_length" steps in the residual_state. rank=rank, monitor=monitor, ) if domain_randomization: env = RandomizedEnvWrapper(env) env = ResidualWrapper(env, state_machine, frameskip, max_torque, residual_state, max_length=episode_length) env = EpisodeInfo(env) env.seed(seed + rank) return env return _thunk if nenv > 1: env = SubprocVecEnv([_env(i) for i in range(nenv)], context='fork') else: env = DummyVecEnv([_env(0)]) env.reward_range = env.envs[0].reward_range if norm_observations: env = VecObsNormWrapper(env) norm_params = torch.load( os.path.join(os.path.dirname(__file__), 'obs_norm.pt')) env.load_state_dict(norm_params) assert env.mean is not None assert env.std is not None return env
def get_initial_state(self): """Get a random initial object pose (always on the ground).""" init = move_cube.sample_goal(difficulty=-1) init.position[:2] = sample_uniform_from_circle(0.10) return init
def get_goal(self): """Get the goal position based on the information from the file.""" ex_state = move_cube.sample_goal(difficulty=self.difficulty) ex_state.position = self.init_information[7:10] ex_state.orientation = self.init_information[10:14] return ex_state
def get_goal(self): """Get a random goal depending on the difficulty.""" return move_cube.sample_goal(difficulty=self.difficulty)
def get_initial_state(self): """Get a random initial object pose (always on the ground).""" return move_cube.sample_goal(difficulty=-1)