コード例 #1
0
def replay_on_holdout(args, sim_env):
    holdoutfile = h5py.File(args.holdoutfile, 'r')
    tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile, args.i_start, args.i_end)
    loadresultfile = h5py.File(args.loadresultfile, 'r')
    loadresult_items = eval_util.get_holdout_items(loadresultfile, tasks)

    num_successes = 0
    num_total = 0
    
    for i_task, demo_id_rope_nodes in loadresult_items:
        print "task %s" % i_task
        sim_util.reset_arms_to_side(sim_env)
        redprint("Replace rope")
        rope_nodes, rope_params, _, _ = eval_util.load_task_results_init(args.loadresultfile, i_task)
        # uncomment if the results file don't have the right rope nodes
        #rope_nodes = demo_id_rope_nodes["rope_nodes"][:]
        if args.replay_rope_params:
            rope_params = args.replay_rope_params
        # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results!
        time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env)

        if args.animation:
            sim_env.viewer.Step()

        eval_util.save_task_results_init(args.resultfile, sim_env, i_task, rope_nodes, rope_params)

        for i_step in range(len(loadresultfile[i_task]) - (1 if 'init' in loadresultfile[i_task] else 0)):
            print "task %s step %i" % (i_task, i_step)
            sim_util.reset_arms_to_side(sim_env)

            redprint("Observe point cloud")
            new_xyz = sim_env.sim.observe_cloud()
    
            eval_stats = eval_util.EvalStats()

            best_action, full_trajs, q_values, trans, rots = eval_util.load_task_results_step(args.loadresultfile, sim_env, i_task, i_step)
            
            time_machine.set_checkpoint('depth_0_%i'%i_step, sim_env)
            time_machine.restore_from_checkpoint('depth_0_%i'%i_step, sim_env, sim_util.get_rope_params(rope_params))
            eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = simulate_demo_traj(sim_env, new_xyz, GlobalVars.actions[best_action], full_trajs, animate=args.animation, interactive=args.interactive)

            print "BEST ACTION:", best_action

            replay_trans, replay_rots = sim_util.get_rope_transforms(sim_env)
            if np.linalg.norm(trans - replay_trans) > 0 or np.linalg.norm(rots - replay_rots) > 0:
                yellowprint("The rope transforms of the replay rope doesn't match the ones in the original result file by %f and %f" % (np.linalg.norm(trans - replay_trans), np.linalg.norm(rots - replay_rots)))
            else:
                yellowprint("Reproducible results OK")
            
            eval_util.save_task_results_step(args.resultfile, sim_env, i_task, i_step, eval_stats, best_action, full_trajs, q_values)
            
            if is_knot(sim_env.sim.rope.GetControlPoints()):
                break;

        if is_knot(sim_env.sim.rope.GetControlPoints()):
            num_successes += 1
        num_total += 1

        redprint('REPLAY Successes / Total: ' + str(num_successes) + '/' + str(num_total))
コード例 #2
0
def replay_on_holdout(args, sim_env):
    holdoutfile = h5py.File(args.holdoutfile, 'r')
    tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile,
                                          args.i_start, args.i_end)
    loadresultfile = h5py.File(args.loadresultfile, 'r')
    loadresult_items = eval_util.get_holdout_items(loadresultfile, tasks)

    num_successes = 0
    num_total = 0

    for i_task, demo_id_rope_nodes in loadresult_items:
        print "task %s" % i_task
        sim_util.reset_arms_to_side(sim_env)
        redprint("Replace rope")
        rope_nodes, rope_params, _, _ = eval_util.load_task_results_init(
            args.loadresultfile, i_task)
        # uncomment if the results file don't have the right rope nodes
        #rope_nodes = demo_id_rope_nodes["rope_nodes"][:]
        if args.replay_rope_params:
            rope_params = args.replay_rope_params
        # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results!
        time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env)

        if args.animation:
            sim_env.viewer.Step()

        eval_util.save_task_results_init(args.resultfile, sim_env, i_task,
                                         rope_nodes, rope_params)

        for i_step in range(
                len(loadresultfile[i_task]) -
            (1 if 'init' in loadresultfile[i_task] else 0)):
            print "task %s step %i" % (i_task, i_step)
            sim_util.reset_arms_to_side(sim_env)

            redprint("Observe point cloud")
            new_xyz = sim_env.sim.observe_cloud()

            eval_stats = eval_util.EvalStats()

            best_action, full_trajs, q_values, trans, rots = eval_util.load_task_results_step(
                args.loadresultfile, sim_env, i_task, i_step)

            time_machine.set_checkpoint('depth_0_%i' % i_step, sim_env)
            time_machine.restore_from_checkpoint(
                'depth_0_%i' % i_step, sim_env,
                sim_util.get_rope_params(rope_params))
            eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = simulate_demo_traj(
                sim_env,
                new_xyz,
                GlobalVars.actions[best_action],
                full_trajs,
                animate=args.animation,
                interactive=args.interactive)

            print "BEST ACTION:", best_action

            replay_trans, replay_rots = sim_util.get_rope_transforms(sim_env)
            if np.linalg.norm(trans - replay_trans) > 0 or np.linalg.norm(
                    rots - replay_rots) > 0:
                yellowprint(
                    "The rope transforms of the replay rope doesn't match the ones in the original result file by %f and %f"
                    % (np.linalg.norm(trans - replay_trans),
                       np.linalg.norm(rots - replay_rots)))
            else:
                yellowprint("Reproducible results OK")

            eval_util.save_task_results_step(args.resultfile, sim_env, i_task,
                                             i_step, eval_stats, best_action,
                                             full_trajs, q_values)

            if is_knot(sim_env.sim.rope.GetControlPoints()):
                break

        if is_knot(sim_env.sim.rope.GetControlPoints()):
            num_successes += 1
        num_total += 1

        redprint('REPLAY Successes / Total: ' + str(num_successes) + '/' +
                 str(num_total))
コード例 #3
0
def eval_on_holdout(args, sim_env):
    feature_fn, _, num_features, actions = select_feature_fn(args)

    weightfile = h5py.File(args.weightfile, 'r')
    weights = weightfile['weights'][:]
    w0 = weightfile['w0'][()] if 'w0' in weightfile else 0
    weightfile.close()
    assert weights.shape[
        0] == num_features, "Dimensions of weights and features don't match. Make sure the right feature is being used"

    holdoutfile = h5py.File(args.holdoutfile, 'r')
    tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile,
                                          args.i_start, args.i_end)
    holdout_items = eval_util.get_holdout_items(holdoutfile, tasks)

    num_successes = 0
    num_total = 0

    for i_task, demo_id_rope_nodes in holdout_items:
        print "task %s" % i_task
        sim_util.reset_arms_to_side(sim_env)
        redprint("Replace rope")
        rope_xyz = demo_id_rope_nodes["rope_nodes"][:]
        # Transform rope_nodes from the kinect's frame into the frame of the PR2
        if 'frame' not in demo_id_rope_nodes or demo_id_rope_nodes['frame'][
            ()] != 'r':
            redprint("Transforming rope into frame of robot")
            rope_xyz = rope_xyz.dot(GlobalVars.init_tfm[:3, :3].T
                                    ) + GlobalVars.init_tfm[:3, 3][None, :]
        rope_nodes = rope_initialization.find_path_through_point_cloud(
            rope_xyz)

        # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results!
        time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env)

        if args.animation:
            sim_env.viewer.Step()

        eval_util.save_task_results_init(args.resultfile, sim_env, i_task,
                                         rope_nodes, args.exec_rope_params)

        for i_step in range(args.num_steps):
            print "task %s step %i" % (i_task, i_step)
            sim_util.reset_arms_to_side(sim_env)

            redprint("Observe point cloud")
            new_xyz = sim_env.sim.observe_cloud()
            state = ("eval_%i" % get_unique_id(), new_xyz)

            redprint("Choosing an action")
            q_values = [
                q_value_fn(state, action, feature_fn, weights, w0)
                for action in actions
            ]
            q_values_root = q_values
            time_machine.set_checkpoint('depth_0_%i' % i_step, sim_env)

            assert args.lookahead_width >= 1, 'Lookahead branches set to zero will fail to select any action'
            agenda = sorted(zip(q_values, actions),
                            key=lambda v: -v[0])[:args.lookahead_width]
            agenda = [
                (v, a, 'depth_0_%i' % i_step, a) for (v, a) in agenda
            ]  # state is (value, most recent action, checkpoint id, root action)
            best_root_action = None
            for depth in range(args.lookahead_depth):
                expansion_results = []
                for (branch, (q, a, chkpt, r_a)) in enumerate(agenda):
                    time_machine.restore_from_checkpoint(
                        chkpt, sim_env,
                        sim_util.get_rope_params(args.lookahead_rope_params))
                    cur_xyz = sim_env.sim.observe_cloud()
                    success, _, _, full_trajs = \
                        compute_trans_traj(sim_env, cur_xyz, GlobalVars.actions[a], animate=args.animation, interactive=False)
                    if args.animation:
                        sim_env.viewer.Step()
                    if is_knot(sim_env.sim.rope.GetControlPoints()):
                        best_root_action = r_a
                        break
                    result_cloud = sim_env.sim.observe_cloud()
                    result_chkpt = 'depth_%i_branch_%i_%i' % (depth + 1,
                                                              branch, i_step)
                    if depth != args.lookahead_depth - 1:  # don't save checkpoint at the last depth to save computation time
                        time_machine.set_checkpoint(result_chkpt, sim_env)
                    expansion_results.append(
                        (result_cloud, a, success, result_chkpt, r_a))
                if best_root_action is not None:
                    redprint('Knot Found, stopping search early')
                    break
                agenda = []
                for (cld, incoming_a, success, chkpt,
                     r_a) in expansion_results:
                    if not success:
                        agenda.append((-np.inf, actions[0], chkpt,
                                       r_a))  # TODO why first action?
                        continue
                    next_state = ("eval_%i" % get_unique_id(), cld)
                    q_values = [(q_value_fn(next_state, action, feature_fn,
                                            weights, w0), action, chkpt, r_a)
                                for action in actions]
                    agenda.extend(q_values)
                agenda.sort(key=lambda v: -v[0])
                agenda = agenda[:args.lookahead_width]
                first_root_action = agenda[0][-1]
                if all(r_a == first_root_action for (_, _, _, r_a) in agenda):
                    best_root_action = first_root_action
                    redprint(
                        'All best actions have same root, stopping search early'
                    )
                    break
            if best_root_action is None:
                best_root_action = agenda[0][-1]

            time_machine.restore_from_checkpoint(
                'depth_0_%i' % i_step, sim_env,
                sim_util.get_rope_params(args.exec_rope_params))
            eval_stats = eval_util.EvalStats()
            eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = \
                compute_trans_traj(sim_env, new_xyz, GlobalVars.actions[best_root_action], animate=args.animation, interactive=args.interactive)

            print "BEST ACTION:", best_root_action
            eval_util.save_task_results_step(args.resultfile, sim_env, i_task,
                                             i_step, eval_stats,
                                             best_root_action, full_trajs,
                                             q_values_root)

            if is_knot(sim_env.sim.rope.GetControlPoints()):
                redprint("KNOT TIED!")
                break

        if is_knot(sim_env.sim.rope.GetControlPoints()):
            num_successes += 1
        num_total += 1

        redprint('Eval Successes / Total: ' + str(num_successes) + '/' +
                 str(num_total))
コード例 #4
0
def eval_on_holdout(args, sim_env):
    feature_fn, _, num_features, actions = select_feature_fn(args)
    
    weightfile = h5py.File(args.weightfile, 'r')
    weights = weightfile['weights'][:]
    w0 = weightfile['w0'][()] if 'w0' in weightfile else 0
    weightfile.close()
    assert weights.shape[0] == num_features, "Dimensions of weights and features don't match. Make sure the right feature is being used"
    
    holdoutfile = h5py.File(args.holdoutfile, 'r')
    tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile, args.i_start, args.i_end)
    holdout_items = eval_util.get_holdout_items(holdoutfile, tasks)

    num_successes = 0
    num_total = 0

    for i_task, demo_id_rope_nodes in holdout_items:
        print "task %s" % i_task
        sim_util.reset_arms_to_side(sim_env)
        redprint("Replace rope")
        rope_xyz = demo_id_rope_nodes["rope_nodes"][:]
        # Transform rope_nodes from the kinect's frame into the frame of the PR2
        if 'frame' not in demo_id_rope_nodes or demo_id_rope_nodes['frame'][()] != 'r':
            redprint("Transforming rope into frame of robot")
            rope_xyz = rope_xyz.dot(GlobalVars.init_tfm[:3,:3].T) + GlobalVars.init_tfm[:3,3][None,:]
        rope_nodes = rope_initialization.find_path_through_point_cloud(rope_xyz)

        # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results!
        time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env)

        if args.animation:
            sim_env.viewer.Step()

        eval_util.save_task_results_init(args.resultfile, sim_env, i_task, rope_nodes, args.exec_rope_params)

        for i_step in range(args.num_steps):
            print "task %s step %i" % (i_task, i_step)
            sim_util.reset_arms_to_side(sim_env)

            redprint("Observe point cloud")
            new_xyz = sim_env.sim.observe_cloud()
            state = ("eval_%i"%get_unique_id(), new_xyz)
            
            redprint("Choosing an action")
            q_values = [q_value_fn(state, action, feature_fn, weights, w0) for action in actions]
            q_values_root = q_values
            time_machine.set_checkpoint('depth_0_%i'%i_step, sim_env)

            assert args.lookahead_width>= 1, 'Lookahead branches set to zero will fail to select any action'
            agenda = sorted(zip(q_values, actions), key = lambda v: -v[0])[:args.lookahead_width]
            agenda = [(v, a, 'depth_0_%i'%i_step, a) for (v, a) in agenda] # state is (value, most recent action, checkpoint id, root action)
            best_root_action = None
            for depth in range(args.lookahead_depth):
                expansion_results = []
                for (branch, (q, a, chkpt, r_a)) in enumerate(agenda):
                    time_machine.restore_from_checkpoint(chkpt, sim_env, sim_util.get_rope_params(args.lookahead_rope_params))
                    cur_xyz = sim_env.sim.observe_cloud()
                    success, _, _, full_trajs = \
                        compute_trans_traj(sim_env, cur_xyz, GlobalVars.actions[a], animate=args.animation, interactive=False)
                    if args.animation:
                        sim_env.viewer.Step()
                    if is_knot(sim_env.sim.rope.GetControlPoints()):
                        best_root_action = r_a
                        break
                    result_cloud = sim_env.sim.observe_cloud()
                    result_chkpt = 'depth_%i_branch_%i_%i'%(depth+1, branch, i_step)
                    if depth != args.lookahead_depth-1: # don't save checkpoint at the last depth to save computation time
                        time_machine.set_checkpoint(result_chkpt, sim_env)
                    expansion_results.append((result_cloud, a, success, result_chkpt, r_a))
                if best_root_action is not None:
                    redprint('Knot Found, stopping search early')
                    break
                agenda = []
                for (cld, incoming_a, success, chkpt, r_a) in expansion_results:
                    if not success:
                        agenda.append((-np.inf, actions[0], chkpt, r_a)) # TODO why first action?
                        continue
                    next_state = ("eval_%i"%get_unique_id(), cld)
                    q_values = [(q_value_fn(next_state, action, feature_fn, weights, w0), action, chkpt, r_a) for action in actions]
                    agenda.extend(q_values)
                agenda.sort(key = lambda v: -v[0])
                agenda = agenda[:args.lookahead_width]
                first_root_action = agenda[0][-1]
                if all(r_a == first_root_action for (_, _, _, r_a) in agenda):
                    best_root_action = first_root_action
                    redprint('All best actions have same root, stopping search early')
                    break
            if best_root_action is None:
                best_root_action = agenda[0][-1]
            
            time_machine.restore_from_checkpoint('depth_0_%i'%i_step, sim_env, sim_util.get_rope_params(args.exec_rope_params))
            eval_stats = eval_util.EvalStats()
            eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = \
                compute_trans_traj(sim_env, new_xyz, GlobalVars.actions[best_root_action], animate=args.animation, interactive=args.interactive)
            
            print "BEST ACTION:", best_root_action
            eval_util.save_task_results_step(args.resultfile, sim_env, i_task, i_step, eval_stats, best_root_action, full_trajs, q_values_root)
            
            if is_knot(sim_env.sim.rope.GetControlPoints()):
                redprint("KNOT TIED!")
                break;

        if is_knot(sim_env.sim.rope.GetControlPoints()):
            num_successes += 1
        num_total += 1

        redprint('Eval Successes / Total: ' + str(num_successes) + '/' + str(num_total))