Ejemplo n.º 1
0
def _init_env(goal_pose_json, difficulty):
    eval_config = {
        'action_space': difficulty2config[difficulty]['action_space'],
        'frameskip': 3,
        'reward_fn': 'competition_reward',
        'termination_fn': 'no_termination',
        'initializer': 'random_init',
        'monitor': False,
        'visualization': True,
        'sim': True,
        'rank': 0
    }

    from code.utils import set_seed
    set_seed(0)
    goal_pose_dict = json.loads(goal_pose_json)
    env = make_training_env(goal_pose_dict, difficulty, **eval_config)
    return env
Ejemplo n.º 2
0
        goal_rot = Rotation.from_quat(self.obs['goal_object_orientation'])

        y_axis = [0, 1, 0]
        goal_direction_vector = goal_rot.apply(y_axis)
        actual_direction_vector = actual_rot.apply(y_axis)

        quat = get_rotation_between_vecs(actual_direction_vector,
                                         goal_direction_vector)
        return (Rotation.from_quat(quat) * actual_rot).as_quat()


if __name__ == '__main__':
    from code.make_env import make_training_env
    from trifinger_simulation.tasks import move_cube
    import dl
    seed = 0
    dl.rng.seed(seed)

    env = make_training_env(move_cube.sample_goal(-1).to_dict(), 4,
                            reward_fn='competition_reward',
                            termination_fn='position_close_to_goal',
                            initializer='centered_init',
                            action_space='torque_and_position',
                            sim=True,
                            visualization=True,
                            episode_length=10000,
                            rank=seed)

    state_machine = TriFingerStateMachine(env)
    state_machine.run()
        path = Path(cube_path, joint_conf_path, tip_path, grasp)
        path.set_min_height(self.env, path_min_height)
        return path


if __name__ == '__main__':
    from trifinger_simulation.tasks import move_cube
    from code.make_env import make_training_env

    reward_fn = 'competition_reward'
    termination_fn = 'position_close_to_goal'
    initializer = 'small_rot_init'
    env = make_training_env(move_cube.sample_goal(-1).to_dict(),
                            4,
                            reward_fn=reward_fn,
                            termination_fn=termination_fn,
                            initializer=initializer,
                            action_space='position',
                            sim=True,
                            visualization=True)

    for i in range(1):
        obs = env.reset()

        pos = obs["object_position"]
        quat = obs["object_orientation"]
        goal_pos = obs["goal_object_position"]
        goal_quat = obs["goal_object_orientation"]
        planner = WholeBodyPlanner(env)
        path = planner.plan(pos,
                            quat,
                            goal_pos,
Ejemplo n.º 4
0
            pregrasp_tip_pos.append(tip_pos)
            pregrasp_jconfs.append(qs[0])
    return pregrasp_jconfs[-1], pregrasp_tip_pos[-1]


if __name__ == '__main__':
    import pybullet as p
    from code.make_env import make_training_env
    from trifinger_simulation.tasks import move_cube
    from code.grasping.grasp_functions import get_heuristic_grasp

    env = make_training_env(move_cube.sample_goal(-1).to_dict(),
                            3,
                            reward_fn='competition_reward',
                            termination_fn='position_close_to_goal',
                            initializer='training_init',
                            action_space='torque',
                            sim=True,
                            visualization=True,
                            rank=1)

    obs = env.reset()
    p.resetDebugVisualizerCamera(cameraDistance=0.6,
                                 cameraYaw=0,
                                 cameraPitch=-40,
                                 cameraTargetPosition=[0, 0, 0])

    grasp = get_heuristic_grasp(env, obs['object_position'],
                                obs['object_orientation'])
    obs, done = execute_grasp_approach(env, obs, grasp)
Ejemplo n.º 5
0
if __name__ == '__main__':
    from code.make_env import make_training_env
    from trifinger_simulation.tasks import move_cube
    from code.grasping import get_planned_grasp, execute_grasp_approach
    from code.utils import set_seed
    from .fc import ForceControlPolicy

    set_seed(0)
    viz = True
    difficulty = 4
    is_level_4 = difficulty == 4
    env = make_training_env(move_cube.sample_goal(-1).to_dict(),
                            difficulty,
                            reward_fn='competition_reward',
                            termination_fn='no_termination',
                            initializer='training_init',
                            action_space='torque_and_position',
                            sim=True,
                            visualization=viz,
                            rank=0)
    env.reset()

    for _ in range(5):
        obs = env.reset()
        p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0)
        p.resetDebugVisualizerCamera(cameraDistance=0.6,
                                     cameraYaw=0,
                                     cameraPitch=-40,
                                     cameraTargetPosition=[0, 0, 0])

        grasp, path = get_planned_grasp(env,
Ejemplo n.º 6
0
def main(args):
    goal = move_cube.sample_goal(args.difficulty)
    goal_dict = {'position': goal.position, 'orientation': goal.orientation}
    eval_config = {
        'cube_goal_pose': goal_dict,
        'goal_difficulty': args.difficulty,
        'action_space':
        'torque' if args.policy == 'fc' else 'torque_and_position',
        'frameskip': 3,
        'reward_fn': 'competition_reward',
        'termination_fn': 'no_termination',
        'initializer': 'training_init',
        'sim': True,
        'monitor': False,
        'rank': args.seed,
        'training': True
    }
    env = make_training_env(visualization=True, **eval_config)

    acc_rewards = []
    wallclock_times = []
    aligning_steps = []
    env_steps = []
    avg_rewards = []
    for i in range(args.num_episodes):
        start = time.time()
        is_done = False
        observation = env.reset()
        accumulated_reward = 0
        aligning_steps.append(env.unwrapped.step_count)

        #clear some windows in GUI
        p.configureDebugVisualizer(p.COV_ENABLE_GUI, 0)
        #change camera parameters # You can also rotate the camera by CTRL + drag
        p.resetDebugVisualizerCamera(cameraDistance=0.6,
                                     cameraYaw=0,
                                     cameraPitch=-40,
                                     cameraTargetPosition=[0, 0, 0])

        step = 0
        while not is_done and step < 2000:
            step += 1
            action = env.action_space.sample()
            if isinstance(action, dict):
                action['torque'] *= 0
                action['position'] *= 0
            else:
                action *= 0
            observation, reward, is_done, info = env.step(action)
            accumulated_reward += reward
        acc_rewards.append(accumulated_reward)
        env_steps.append(env.unwrapped.step_count - aligning_steps[-1])
        avg_rewards.append(accumulated_reward / env_steps[-1])
        print("Episode {}\tAccumulated reward: {}".format(
            i, accumulated_reward))
        print("Episode {}\tAlinging steps: {}".format(i, aligning_steps[-1]))
        print("Episode {}\tEnv steps: {}".format(i, env_steps[-1]))
        print("Episode {}\tAvg reward: {}".format(i, avg_rewards[-1]))
        end = time.time()
        print('Elapsed:', end - start)
        wallclock_times.append(end - start)

    env.close()

    def _print_stats(name, data):
        print('======================================')
        print(f'Mean   {name}\t{np.mean(data):.2f}')
        print(f'Max    {name}\t{max(data):.2f}')
        print(f'Min    {name}\t{min(data):.2f}')
        print(f'Median {name}\t{statistics.median(data):2f}')

    print('Total elapsed time\t{:.2f}'.format(sum(wallclock_times)))
    print('Mean elapsed time\t{:.2f}'.format(
        sum(wallclock_times) / len(wallclock_times)))
    _print_stats('acc reward', acc_rewards)
    _print_stats('aligning steps', aligning_steps)
    _print_stats('step reward', avg_rewards)