def replay_on_holdout(args, sim_env): holdoutfile = h5py.File(args.holdoutfile, 'r') tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile, args.i_start, args.i_end) loadresultfile = h5py.File(args.loadresultfile, 'r') loadresult_items = eval_util.get_holdout_items(loadresultfile, tasks) num_successes = 0 num_total = 0 for i_task, demo_id_rope_nodes in loadresult_items: print "task %s" % i_task sim_util.reset_arms_to_side(sim_env) redprint("Replace rope") rope_nodes, rope_params, _, _ = eval_util.load_task_results_init(args.loadresultfile, i_task) # uncomment if the results file don't have the right rope nodes #rope_nodes = demo_id_rope_nodes["rope_nodes"][:] if args.replay_rope_params: rope_params = args.replay_rope_params # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results! time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env) if args.animation: sim_env.viewer.Step() eval_util.save_task_results_init(args.resultfile, sim_env, i_task, rope_nodes, rope_params) for i_step in range(len(loadresultfile[i_task]) - (1 if 'init' in loadresultfile[i_task] else 0)): print "task %s step %i" % (i_task, i_step) sim_util.reset_arms_to_side(sim_env) redprint("Observe point cloud") new_xyz = sim_env.sim.observe_cloud() eval_stats = eval_util.EvalStats() best_action, full_trajs, q_values, trans, rots = eval_util.load_task_results_step(args.loadresultfile, sim_env, i_task, i_step) time_machine.set_checkpoint('depth_0_%i'%i_step, sim_env) time_machine.restore_from_checkpoint('depth_0_%i'%i_step, sim_env, sim_util.get_rope_params(rope_params)) eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = simulate_demo_traj(sim_env, new_xyz, GlobalVars.actions[best_action], full_trajs, animate=args.animation, interactive=args.interactive) print "BEST ACTION:", best_action replay_trans, replay_rots = sim_util.get_rope_transforms(sim_env) if np.linalg.norm(trans - replay_trans) > 0 or np.linalg.norm(rots - replay_rots) > 0: yellowprint("The rope transforms of the replay rope doesn't match the ones in the original result file by %f and %f" % (np.linalg.norm(trans - replay_trans), np.linalg.norm(rots - replay_rots))) else: yellowprint("Reproducible results OK") eval_util.save_task_results_step(args.resultfile, sim_env, i_task, i_step, eval_stats, best_action, full_trajs, q_values) if is_knot(sim_env.sim.rope.GetControlPoints()): break; if is_knot(sim_env.sim.rope.GetControlPoints()): num_successes += 1 num_total += 1 redprint('REPLAY Successes / Total: ' + str(num_successes) + '/' + str(num_total))
def eval_demo_playback(args, sim_env): actionfile = GlobalVars.actions num_successes = 0 num_total = 0 actions_names = [] for action_name in actionfile: actions_names.append(action_name) i = 0 i_task = 0 i_step = 0 while i < len(actions_names): (demo_name, seg_name) = actions_names[i].split('-') if seg_name == 'seg00': if i > 0: if is_knot(sim_env.sim.rope.GetControlPoints()): redprint("KNOT TIED!") num_successes += 1 num_total += 1 i_task += 1 i_step = 0 print "task %s" % demo_name sim_util.reset_arms_to_side(sim_env, floating=True) redprint("Replace rope") rope_xyz = np.asarray(actionfile[actions_names[i]]['cloud_xyz']) rope_nodes = rope_initialization.find_path_through_point_cloud(rope_xyz) # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results! time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env, rope_params=sim_util.get_rope_params("thick"), floating=True) if args.animation: sim_env.viewer.Step() print "task %s step %i" % (i_task, i_step) sim_util.reset_arms_to_side(sim_env, floating=True) redprint("Observe point cloud") #new_xyz = sim_env.sim.observe_cloud(GlobalVars.rope_observe_cloud_upsample) new_xyz = sim_env.sim.observe_cloud2(0.01, GlobalVars.rope_observe_cloud_upsample, 1, 4) state = ("eval_%i"%get_unique_id(), new_xyz) action = actions_names[i] eval_stats = eval_util.EvalStats() eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = \ compute_trans_traj(sim_env, new_xyz, GlobalVars.actions[action], animate=args.animation, interactive=args.interactive) i += 1 num_total += 1 redprint('Eval Successes / Total: ' + str(num_successes) + '/' + str(num_total))
def replay_on_holdout(args, sim_env): holdoutfile = h5py.File(args.holdoutfile, 'r') tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile, args.i_start, args.i_end) loadresultfile = h5py.File(args.loadresultfile, 'r') loadresult_items = eval_util.get_holdout_items(loadresultfile, tasks) num_successes = 0 num_total = 0 for i_task, demo_id_rope_nodes in loadresult_items: print "task %s" % i_task sim_util.reset_arms_to_side(sim_env) redprint("Replace rope") rope_nodes, rope_params, _, _ = eval_util.load_task_results_init( args.loadresultfile, i_task) # uncomment if the results file don't have the right rope nodes #rope_nodes = demo_id_rope_nodes["rope_nodes"][:] if args.replay_rope_params: rope_params = args.replay_rope_params # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results! time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env) if args.animation: sim_env.viewer.Step() eval_util.save_task_results_init(args.resultfile, sim_env, i_task, rope_nodes, rope_params) for i_step in range( len(loadresultfile[i_task]) - (1 if 'init' in loadresultfile[i_task] else 0)): print "task %s step %i" % (i_task, i_step) sim_util.reset_arms_to_side(sim_env) redprint("Observe point cloud") new_xyz = sim_env.sim.observe_cloud() eval_stats = eval_util.EvalStats() best_action, full_trajs, q_values, trans, rots = eval_util.load_task_results_step( args.loadresultfile, sim_env, i_task, i_step) time_machine.set_checkpoint('depth_0_%i' % i_step, sim_env) time_machine.restore_from_checkpoint( 'depth_0_%i' % i_step, sim_env, sim_util.get_rope_params(rope_params)) eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = simulate_demo_traj( sim_env, new_xyz, GlobalVars.actions[best_action], full_trajs, animate=args.animation, interactive=args.interactive) print "BEST ACTION:", best_action replay_trans, replay_rots = sim_util.get_rope_transforms(sim_env) if np.linalg.norm(trans - replay_trans) > 0 or np.linalg.norm( rots - replay_rots) > 0: yellowprint( "The rope transforms of the replay rope doesn't match the ones in the original result file by %f and %f" % (np.linalg.norm(trans - replay_trans), np.linalg.norm(rots - replay_rots))) else: yellowprint("Reproducible results OK") eval_util.save_task_results_step(args.resultfile, sim_env, i_task, i_step, eval_stats, best_action, full_trajs, q_values) if is_knot(sim_env.sim.rope.GetControlPoints()): break if is_knot(sim_env.sim.rope.GetControlPoints()): num_successes += 1 num_total += 1 redprint('REPLAY Successes / Total: ' + str(num_successes) + '/' + str(num_total))
def eval_on_holdout(args, sim_env): feature_fn, _, num_features, actions = select_feature_fn(args) weightfile = h5py.File(args.weightfile, 'r') weights = weightfile['weights'][:] w0 = weightfile['w0'][()] if 'w0' in weightfile else 0 weightfile.close() assert weights.shape[ 0] == num_features, "Dimensions of weights and features don't match. Make sure the right feature is being used" holdoutfile = h5py.File(args.holdoutfile, 'r') tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile, args.i_start, args.i_end) holdout_items = eval_util.get_holdout_items(holdoutfile, tasks) num_successes = 0 num_total = 0 for i_task, demo_id_rope_nodes in holdout_items: print "task %s" % i_task sim_util.reset_arms_to_side(sim_env) redprint("Replace rope") rope_xyz = demo_id_rope_nodes["rope_nodes"][:] # Transform rope_nodes from the kinect's frame into the frame of the PR2 if 'frame' not in demo_id_rope_nodes or demo_id_rope_nodes['frame'][ ()] != 'r': redprint("Transforming rope into frame of robot") rope_xyz = rope_xyz.dot(GlobalVars.init_tfm[:3, :3].T ) + GlobalVars.init_tfm[:3, 3][None, :] rope_nodes = rope_initialization.find_path_through_point_cloud( rope_xyz) # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results! time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env) if args.animation: sim_env.viewer.Step() eval_util.save_task_results_init(args.resultfile, sim_env, i_task, rope_nodes, args.exec_rope_params) for i_step in range(args.num_steps): print "task %s step %i" % (i_task, i_step) sim_util.reset_arms_to_side(sim_env) redprint("Observe point cloud") new_xyz = sim_env.sim.observe_cloud() state = ("eval_%i" % get_unique_id(), new_xyz) redprint("Choosing an action") q_values = [ q_value_fn(state, action, feature_fn, weights, w0) for action in actions ] q_values_root = q_values time_machine.set_checkpoint('depth_0_%i' % i_step, sim_env) assert args.lookahead_width >= 1, 'Lookahead branches set to zero will fail to select any action' agenda = sorted(zip(q_values, actions), key=lambda v: -v[0])[:args.lookahead_width] agenda = [ (v, a, 'depth_0_%i' % i_step, a) for (v, a) in agenda ] # state is (value, most recent action, checkpoint id, root action) best_root_action = None for depth in range(args.lookahead_depth): expansion_results = [] for (branch, (q, a, chkpt, r_a)) in enumerate(agenda): time_machine.restore_from_checkpoint( chkpt, sim_env, sim_util.get_rope_params(args.lookahead_rope_params)) cur_xyz = sim_env.sim.observe_cloud() success, _, _, full_trajs = \ compute_trans_traj(sim_env, cur_xyz, GlobalVars.actions[a], animate=args.animation, interactive=False) if args.animation: sim_env.viewer.Step() if is_knot(sim_env.sim.rope.GetControlPoints()): best_root_action = r_a break result_cloud = sim_env.sim.observe_cloud() result_chkpt = 'depth_%i_branch_%i_%i' % (depth + 1, branch, i_step) if depth != args.lookahead_depth - 1: # don't save checkpoint at the last depth to save computation time time_machine.set_checkpoint(result_chkpt, sim_env) expansion_results.append( (result_cloud, a, success, result_chkpt, r_a)) if best_root_action is not None: redprint('Knot Found, stopping search early') break agenda = [] for (cld, incoming_a, success, chkpt, r_a) in expansion_results: if not success: agenda.append((-np.inf, actions[0], chkpt, r_a)) # TODO why first action? continue next_state = ("eval_%i" % get_unique_id(), cld) q_values = [(q_value_fn(next_state, action, feature_fn, weights, w0), action, chkpt, r_a) for action in actions] agenda.extend(q_values) agenda.sort(key=lambda v: -v[0]) agenda = agenda[:args.lookahead_width] first_root_action = agenda[0][-1] if all(r_a == first_root_action for (_, _, _, r_a) in agenda): best_root_action = first_root_action redprint( 'All best actions have same root, stopping search early' ) break if best_root_action is None: best_root_action = agenda[0][-1] time_machine.restore_from_checkpoint( 'depth_0_%i' % i_step, sim_env, sim_util.get_rope_params(args.exec_rope_params)) eval_stats = eval_util.EvalStats() eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = \ compute_trans_traj(sim_env, new_xyz, GlobalVars.actions[best_root_action], animate=args.animation, interactive=args.interactive) print "BEST ACTION:", best_root_action eval_util.save_task_results_step(args.resultfile, sim_env, i_task, i_step, eval_stats, best_root_action, full_trajs, q_values_root) if is_knot(sim_env.sim.rope.GetControlPoints()): redprint("KNOT TIED!") break if is_knot(sim_env.sim.rope.GetControlPoints()): num_successes += 1 num_total += 1 redprint('Eval Successes / Total: ' + str(num_successes) + '/' + str(num_total))
def is_knot_features(self, state, action): return np.array([int(is_knot(state[1]))])
def eval_on_holdout(args, sim_env): feature_fn, _, num_features, actions = select_feature_fn(args) weightfile = h5py.File(args.weightfile, 'r') weights = weightfile['weights'][:] w0 = weightfile['w0'][()] if 'w0' in weightfile else 0 weightfile.close() assert weights.shape[0] == num_features, "Dimensions of weights and features don't match. Make sure the right feature is being used" holdoutfile = h5py.File(args.holdoutfile, 'r') tasks = eval_util.get_specified_tasks(args.tasks, args.taskfile, args.i_start, args.i_end) holdout_items = eval_util.get_holdout_items(holdoutfile, tasks) num_successes = 0 num_total = 0 for i_task, demo_id_rope_nodes in holdout_items: print "task %s" % i_task sim_util.reset_arms_to_side(sim_env) redprint("Replace rope") rope_xyz = demo_id_rope_nodes["rope_nodes"][:] # Transform rope_nodes from the kinect's frame into the frame of the PR2 if 'frame' not in demo_id_rope_nodes or demo_id_rope_nodes['frame'][()] != 'r': redprint("Transforming rope into frame of robot") rope_xyz = rope_xyz.dot(GlobalVars.init_tfm[:3,:3].T) + GlobalVars.init_tfm[:3,3][None,:] rope_nodes = rope_initialization.find_path_through_point_cloud(rope_xyz) # don't call replace_rope and sim.settle() directly. use time machine interface for deterministic results! time_machine = sim_util.RopeSimTimeMachine(rope_nodes, sim_env) if args.animation: sim_env.viewer.Step() eval_util.save_task_results_init(args.resultfile, sim_env, i_task, rope_nodes, args.exec_rope_params) for i_step in range(args.num_steps): print "task %s step %i" % (i_task, i_step) sim_util.reset_arms_to_side(sim_env) redprint("Observe point cloud") new_xyz = sim_env.sim.observe_cloud() state = ("eval_%i"%get_unique_id(), new_xyz) redprint("Choosing an action") q_values = [q_value_fn(state, action, feature_fn, weights, w0) for action in actions] q_values_root = q_values time_machine.set_checkpoint('depth_0_%i'%i_step, sim_env) assert args.lookahead_width>= 1, 'Lookahead branches set to zero will fail to select any action' agenda = sorted(zip(q_values, actions), key = lambda v: -v[0])[:args.lookahead_width] agenda = [(v, a, 'depth_0_%i'%i_step, a) for (v, a) in agenda] # state is (value, most recent action, checkpoint id, root action) best_root_action = None for depth in range(args.lookahead_depth): expansion_results = [] for (branch, (q, a, chkpt, r_a)) in enumerate(agenda): time_machine.restore_from_checkpoint(chkpt, sim_env, sim_util.get_rope_params(args.lookahead_rope_params)) cur_xyz = sim_env.sim.observe_cloud() success, _, _, full_trajs = \ compute_trans_traj(sim_env, cur_xyz, GlobalVars.actions[a], animate=args.animation, interactive=False) if args.animation: sim_env.viewer.Step() if is_knot(sim_env.sim.rope.GetControlPoints()): best_root_action = r_a break result_cloud = sim_env.sim.observe_cloud() result_chkpt = 'depth_%i_branch_%i_%i'%(depth+1, branch, i_step) if depth != args.lookahead_depth-1: # don't save checkpoint at the last depth to save computation time time_machine.set_checkpoint(result_chkpt, sim_env) expansion_results.append((result_cloud, a, success, result_chkpt, r_a)) if best_root_action is not None: redprint('Knot Found, stopping search early') break agenda = [] for (cld, incoming_a, success, chkpt, r_a) in expansion_results: if not success: agenda.append((-np.inf, actions[0], chkpt, r_a)) # TODO why first action? continue next_state = ("eval_%i"%get_unique_id(), cld) q_values = [(q_value_fn(next_state, action, feature_fn, weights, w0), action, chkpt, r_a) for action in actions] agenda.extend(q_values) agenda.sort(key = lambda v: -v[0]) agenda = agenda[:args.lookahead_width] first_root_action = agenda[0][-1] if all(r_a == first_root_action for (_, _, _, r_a) in agenda): best_root_action = first_root_action redprint('All best actions have same root, stopping search early') break if best_root_action is None: best_root_action = agenda[0][-1] time_machine.restore_from_checkpoint('depth_0_%i'%i_step, sim_env, sim_util.get_rope_params(args.exec_rope_params)) eval_stats = eval_util.EvalStats() eval_stats.success, eval_stats.feasible, eval_stats.misgrasp, full_trajs = \ compute_trans_traj(sim_env, new_xyz, GlobalVars.actions[best_root_action], animate=args.animation, interactive=args.interactive) print "BEST ACTION:", best_root_action eval_util.save_task_results_step(args.resultfile, sim_env, i_task, i_step, eval_stats, best_root_action, full_trajs, q_values_root) if is_knot(sim_env.sim.rope.GetControlPoints()): redprint("KNOT TIED!") break; if is_knot(sim_env.sim.rope.GetControlPoints()): num_successes += 1 num_total += 1 redprint('Eval Successes / Total: ' + str(num_successes) + '/' + str(num_total))
def loop_body(task_params, demofile, choose_segment, knot, animate, curr_step=None, no_cmat=False): """ Do the body of the main task execution loop (ie. do a segment). Arguments: curr_step is 0 indexed choose_segment is a function that returns the key in the demofile to the segment knot is the knot the rope is checked against new_xyz is the new pointcloud task_params is used for the only_original_segments argument seg_info is of the form: {'parent': The name of the segment from the action file that was chosen, 'hmats': trajectories for the left and right grippers, 'cloud_xyz': the new pointcloud, 'cmat': the correspondence matrix or list of segments} #return {'found_knot': found_knot, 'seg_info': {'segment': segment, 'link2eetraj': link2eetraj, 'new_xyz': new_xyz}} return {'found_knot': found_knot, 'seg_info': seg_info} """ redprint("Acquire point cloud") move_sim_arms_to_side() new_xyz = Globals.sim.observe_cloud(upsample=110) segment = choose_segment(demofile, new_xyz, 7) if segment is None: print "Got no segment while choosing a segment for warping." sys.exit(-1) seg_info = demofile[segment] redprint("Getting warped trajectory...") cmat, warped_ee_traj, miniseg_starts, miniseg_ends, joint_traj = get_warped_trajectory(seg_info, new_xyz, demofile, warp_root=task_params.warp_root, plot=task_params.animate, no_cmat=no_cmat) success = True redprint("executing segment trajectory...") for (i_miniseg, (i_start, i_end)) in enumerate(zip(miniseg_starts, miniseg_ends)): lr_miniseg_traj = {} for lr in 'lr': ee_hmat_traj = warped_ee_traj[lr][:][i_start: i_end + 1] if arm_moved(ee_hmat_traj): lr_miniseg_traj[lr] = ee_hmat_traj yellowprint("\t Executing trajectory for segment %s, part %i using arms '%s'" % (segment, i_miniseg, lr_miniseg_traj.keys())) for lr in 'lr': gripper_open = binarize_gripper(joint_traj[lr][i_start]) prev_gripper_open = binarize_gripper(joint_traj[lr][i_start - 1]) if i_start != 0 else False if not set_gripper_sim(lr, gripper_open, prev_gripper_open, animate): redprint("Grab %s failed" % lr) success = False if not success: break if len(lr_miniseg_traj) > 0: success &= exec_traj_sim(lr_miniseg_traj, animate) if not success: break Globals.sim.settle(animate=animate) rope_nodes = Globals.sim.observe_cloud() found_knot = is_knot(rope_nodes) redprint("Segment %s result: %s" % (segment, success)) seg_info_hash = {'parent': segment, 'hmats': warped_ee_traj, 'cloud_xyz': new_xyz, 'cmat': cmat} return {'found_knot': found_knot, 'seg_info':seg_info_hash}