def replay_on_holdout(args, sim_env): holdoutfile = h5py.File(args.holdoutfile, 'r') tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile, args.i_start, args.i_end) loadresultfile = h5py.File(args.loadresultfile, 'r') loadresult_items = eval_util.get_holdout_items(loadresultfile, tasks) num_successes = 0 num_total = 0 for i_task, demo_id_rope_nodes in loadresult_items: print "task %s" % i_task sim_util.reset_arms_to_side(sim_env) redprint("Replace rope") rope_nodes, rope_params, _, _ = eval_util.load_task_results_init(args.loadresultfile, i_task) # uncomment if the results file don't have the right rope nodes #rope_nodes = demo_id_rope_nodes["rope_nodes"][:] if args.replay_rope_params: rope_params = args.replay_rope_params # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results! time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env) if args.animation: sim_env.viewer.Step() eval_util.save_task_results_init(args.resultfile, sim_env, i_task, rope_nodes, rope_params) for i_step in range(len(loadresultfile[i_task]) - (1 if 'init' in loadresultfile[i_task] else 0)): print "task %s step %i" % (i_task, i_step) sim_util.reset_arms_to_side(sim_env) redprint("Observe point cloud") new_xyz = sim_env.sim.observe_cloud() eval_stats = eval_util.EvalStats() best_action, full_trajs, q_values, trans, rots = eval_util.load_task_results_step(args.loadresultfile, sim_env, i_task, i_step) time_machine.set_checkpoint('depth_0_%i'%i_step, sim_env) time_machine.restore_from_checkpoint('depth_0_%i'%i_step, sim_env, sim_util.get_rope_params(rope_params)) eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = simulate_demo_traj(sim_env, new_xyz, GlobalVars.actions[best_action], full_trajs, animate=args.animation, interactive=args.interactive) print "BEST ACTION:", best_action replay_trans, replay_rots = sim_util.get_rope_transforms(sim_env) if np.linalg.norm(trans - replay_trans) > 0 or np.linalg.norm(rots - replay_rots) > 0: yellowprint("The rope transforms of the replay rope doesn't match the ones in the original result file by %f and %f" % (np.linalg.norm(trans - replay_trans), np.linalg.norm(rots - replay_rots))) else: yellowprint("Reproducible results OK") eval_util.save_task_results_step(args.resultfile, sim_env, i_task, i_step, eval_stats, best_action, full_trajs, q_values) if is_knot(sim_env.sim.rope.GetControlPoints()): break; if is_knot(sim_env.sim.rope.GetControlPoints()): num_successes += 1 num_total += 1 redprint('REPLAY Successes / Total: ' + str(num_successes) + '/' + str(num_total))
def replay_on_holdout(args, sim_env): holdoutfile = h5py.File(args.holdoutfile, 'r') tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile, args.i_start, args.i_end) loadresultfile = h5py.File(args.loadresultfile, 'r') loadresult_items = eval_util.get_holdout_items(loadresultfile, tasks) num_successes = 0 num_total = 0 for i_task, demo_id_rope_nodes in loadresult_items: print "task %s" % i_task sim_util.reset_arms_to_side(sim_env) redprint("Replace rope") rope_nodes, rope_params, _, _ = eval_util.load_task_results_init( args.loadresultfile, i_task) # uncomment if the results file don't have the right rope nodes #rope_nodes = demo_id_rope_nodes["rope_nodes"][:] if args.replay_rope_params: rope_params = args.replay_rope_params # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results! time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env) if args.animation: sim_env.viewer.Step() eval_util.save_task_results_init(args.resultfile, sim_env, i_task, rope_nodes, rope_params) for i_step in range( len(loadresultfile[i_task]) - (1 if 'init' in loadresultfile[i_task] else 0)): print "task %s step %i" % (i_task, i_step) sim_util.reset_arms_to_side(sim_env) redprint("Observe point cloud") new_xyz = sim_env.sim.observe_cloud() eval_stats = eval_util.EvalStats() best_action, full_trajs, q_values, trans, rots = eval_util.load_task_results_step( args.loadresultfile, sim_env, i_task, i_step) time_machine.set_checkpoint('depth_0_%i' % i_step, sim_env) time_machine.restore_from_checkpoint( 'depth_0_%i' % i_step, sim_env, sim_util.get_rope_params(rope_params)) eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = simulate_demo_traj( sim_env, new_xyz, GlobalVars.actions[best_action], full_trajs, animate=args.animation, interactive=args.interactive) print "BEST ACTION:", best_action replay_trans, replay_rots = sim_util.get_rope_transforms(sim_env) if np.linalg.norm(trans - replay_trans) > 0 or np.linalg.norm( rots - replay_rots) > 0: yellowprint( "The rope transforms of the replay rope doesn't match the ones in the original result file by %f and %f" % (np.linalg.norm(trans - replay_trans), np.linalg.norm(rots - replay_rots))) else: yellowprint("Reproducible results OK") eval_util.save_task_results_step(args.resultfile, sim_env, i_task, i_step, eval_stats, best_action, full_trajs, q_values) if is_knot(sim_env.sim.rope.GetControlPoints()): break if is_knot(sim_env.sim.rope.GetControlPoints()): num_successes += 1 num_total += 1 redprint('REPLAY Successes / Total: ' + str(num_successes) + '/' + str(num_total))
def eval_on_holdout(args, sim_env): feature_fn, _, num_features, actions = select_feature_fn(args) weightfile = h5py.File(args.weightfile, 'r') weights = weightfile['weights'][:] w0 = weightfile['w0'][()] if 'w0' in weightfile else 0 weightfile.close() assert weights.shape[ 0] == num_features, "Dimensions of weights and features don't match. Make sure the right feature is being used" holdoutfile = h5py.File(args.holdoutfile, 'r') tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile, args.i_start, args.i_end) holdout_items = eval_util.get_holdout_items(holdoutfile, tasks) num_successes = 0 num_total = 0 for i_task, demo_id_rope_nodes in holdout_items: print "task %s" % i_task sim_util.reset_arms_to_side(sim_env) redprint("Replace rope") rope_xyz = demo_id_rope_nodes["rope_nodes"][:] # Transform rope_nodes from the kinect's frame into the frame of the PR2 if 'frame' not in demo_id_rope_nodes or demo_id_rope_nodes['frame'][ ()] != 'r': redprint("Transforming rope into frame of robot") rope_xyz = rope_xyz.dot(GlobalVars.init_tfm[:3, :3].T ) + GlobalVars.init_tfm[:3, 3][None, :] rope_nodes = rope_initialization.find_path_through_point_cloud( rope_xyz) # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results! time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env) if args.animation: sim_env.viewer.Step() eval_util.save_task_results_init(args.resultfile, sim_env, i_task, rope_nodes, args.exec_rope_params) for i_step in range(args.num_steps): print "task %s step %i" % (i_task, i_step) sim_util.reset_arms_to_side(sim_env) redprint("Observe point cloud") new_xyz = sim_env.sim.observe_cloud() state = ("eval_%i" % get_unique_id(), new_xyz) redprint("Choosing an action") q_values = [ q_value_fn(state, action, feature_fn, weights, w0) for action in actions ] q_values_root = q_values time_machine.set_checkpoint('depth_0_%i' % i_step, sim_env) assert args.lookahead_width >= 1, 'Lookahead branches set to zero will fail to select any action' agenda = sorted(zip(q_values, actions), key=lambda v: -v[0])[:args.lookahead_width] agenda = [ (v, a, 'depth_0_%i' % i_step, a) for (v, a) in agenda ] # state is (value, most recent action, checkpoint id, root action) best_root_action = None for depth in range(args.lookahead_depth): expansion_results = [] for (branch, (q, a, chkpt, r_a)) in enumerate(agenda): time_machine.restore_from_checkpoint( chkpt, sim_env, sim_util.get_rope_params(args.lookahead_rope_params)) cur_xyz = sim_env.sim.observe_cloud() success, _, _, full_trajs = \ compute_trans_traj(sim_env, cur_xyz, GlobalVars.actions[a], animate=args.animation, interactive=False) if args.animation: sim_env.viewer.Step() if is_knot(sim_env.sim.rope.GetControlPoints()): best_root_action = r_a break result_cloud = sim_env.sim.observe_cloud() result_chkpt = 'depth_%i_branch_%i_%i' % (depth + 1, branch, i_step) if depth != args.lookahead_depth - 1: # don't save checkpoint at the last depth to save computation time time_machine.set_checkpoint(result_chkpt, sim_env) expansion_results.append( (result_cloud, a, success, result_chkpt, r_a)) if best_root_action is not None: redprint('Knot Found, stopping search early') break agenda = [] for (cld, incoming_a, success, chkpt, r_a) in expansion_results: if not success: agenda.append((-np.inf, actions[0], chkpt, r_a)) # TODO why first action? continue next_state = ("eval_%i" % get_unique_id(), cld) q_values = [(q_value_fn(next_state, action, feature_fn, weights, w0), action, chkpt, r_a) for action in actions] agenda.extend(q_values) agenda.sort(key=lambda v: -v[0]) agenda = agenda[:args.lookahead_width] first_root_action = agenda[0][-1] if all(r_a == first_root_action for (_, _, _, r_a) in agenda): best_root_action = first_root_action redprint( 'All best actions have same root, stopping search early' ) break if best_root_action is None: best_root_action = agenda[0][-1] time_machine.restore_from_checkpoint( 'depth_0_%i' % i_step, sim_env, sim_util.get_rope_params(args.exec_rope_params)) eval_stats = eval_util.EvalStats() eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = \ compute_trans_traj(sim_env, new_xyz, GlobalVars.actions[best_root_action], animate=args.animation, interactive=args.interactive) print "BEST ACTION:", best_root_action eval_util.save_task_results_step(args.resultfile, sim_env, i_task, i_step, eval_stats, best_root_action, full_trajs, q_values_root) if is_knot(sim_env.sim.rope.GetControlPoints()): redprint("KNOT TIED!") break if is_knot(sim_env.sim.rope.GetControlPoints()): num_successes += 1 num_total += 1 redprint('Eval Successes / Total: ' + str(num_successes) + '/' + str(num_total))
def eval_on_holdout(args, sim_env): feature_fn, _, num_features, actions = select_feature_fn(args) weightfile = h5py.File(args.weightfile, 'r') weights = weightfile['weights'][:] w0 = weightfile['w0'][()] if 'w0' in weightfile else 0 weightfile.close() assert weights.shape[0] == num_features, "Dimensions of weights and features don't match. Make sure the right feature is being used" holdoutfile = h5py.File(args.holdoutfile, 'r') tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile, args.i_start, args.i_end) holdout_items = eval_util.get_holdout_items(holdoutfile, tasks) num_successes = 0 num_total = 0 for i_task, demo_id_rope_nodes in holdout_items: print "task %s" % i_task sim_util.reset_arms_to_side(sim_env) redprint("Replace rope") rope_xyz = demo_id_rope_nodes["rope_nodes"][:] # Transform rope_nodes from the kinect's frame into the frame of the PR2 if 'frame' not in demo_id_rope_nodes or demo_id_rope_nodes['frame'][()] != 'r': redprint("Transforming rope into frame of robot") rope_xyz = rope_xyz.dot(GlobalVars.init_tfm[:3,:3].T) + GlobalVars.init_tfm[:3,3][None,:] rope_nodes = rope_initialization.find_path_through_point_cloud(rope_xyz) # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results! time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env) if args.animation: sim_env.viewer.Step() eval_util.save_task_results_init(args.resultfile, sim_env, i_task, rope_nodes, args.exec_rope_params) for i_step in range(args.num_steps): print "task %s step %i" % (i_task, i_step) sim_util.reset_arms_to_side(sim_env) redprint("Observe point cloud") new_xyz = sim_env.sim.observe_cloud() state = ("eval_%i"%get_unique_id(), new_xyz) redprint("Choosing an action") q_values = [q_value_fn(state, action, feature_fn, weights, w0) for action in actions] q_values_root = q_values time_machine.set_checkpoint('depth_0_%i'%i_step, sim_env) assert args.lookahead_width>= 1, 'Lookahead branches set to zero will fail to select any action' agenda = sorted(zip(q_values, actions), key = lambda v: -v[0])[:args.lookahead_width] agenda = [(v, a, 'depth_0_%i'%i_step, a) for (v, a) in agenda] # state is (value, most recent action, checkpoint id, root action) best_root_action = None for depth in range(args.lookahead_depth): expansion_results = [] for (branch, (q, a, chkpt, r_a)) in enumerate(agenda): time_machine.restore_from_checkpoint(chkpt, sim_env, sim_util.get_rope_params(args.lookahead_rope_params)) cur_xyz = sim_env.sim.observe_cloud() success, _, _, full_trajs = \ compute_trans_traj(sim_env, cur_xyz, GlobalVars.actions[a], animate=args.animation, interactive=False) if args.animation: sim_env.viewer.Step() if is_knot(sim_env.sim.rope.GetControlPoints()): best_root_action = r_a break result_cloud = sim_env.sim.observe_cloud() result_chkpt = 'depth_%i_branch_%i_%i'%(depth+1, branch, i_step) if depth != args.lookahead_depth-1: # don't save checkpoint at the last depth to save computation time time_machine.set_checkpoint(result_chkpt, sim_env) expansion_results.append((result_cloud, a, success, result_chkpt, r_a)) if best_root_action is not None: redprint('Knot Found, stopping search early') break agenda = [] for (cld, incoming_a, success, chkpt, r_a) in expansion_results: if not success: agenda.append((-np.inf, actions[0], chkpt, r_a)) # TODO why first action? continue next_state = ("eval_%i"%get_unique_id(), cld) q_values = [(q_value_fn(next_state, action, feature_fn, weights, w0), action, chkpt, r_a) for action in actions] agenda.extend(q_values) agenda.sort(key = lambda v: -v[0]) agenda = agenda[:args.lookahead_width] first_root_action = agenda[0][-1] if all(r_a == first_root_action for (_, _, _, r_a) in agenda): best_root_action = first_root_action redprint('All best actions have same root, stopping search early') break if best_root_action is None: best_root_action = agenda[0][-1] time_machine.restore_from_checkpoint('depth_0_%i'%i_step, sim_env, sim_util.get_rope_params(args.exec_rope_params)) eval_stats = eval_util.EvalStats() eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = \ compute_trans_traj(sim_env, new_xyz, GlobalVars.actions[best_root_action], animate=args.animation, interactive=args.interactive) print "BEST ACTION:", best_root_action eval_util.save_task_results_step(args.resultfile, sim_env, i_task, i_step, eval_stats, best_root_action, full_trajs, q_values_root) if is_knot(sim_env.sim.rope.GetControlPoints()): redprint("KNOT TIED!") break; if is_knot(sim_env.sim.rope.GetControlPoints()): num_successes += 1 num_total += 1 redprint('Eval Successes / Total: ' + str(num_successes) + '/' + str(num_total))