Ejemplo n.º 1
0
def test_model(sampler, args, gripper=None):
    """
    Maximize the GP mean function to get the best policy.
    :param sampler: A GP fit to the current BusyBox.
    :return: Regret.
    """
    # Optimize the GP to get the best policy.
    ucb = False
    stop_x, stop_policy, start_x = sampler.optim.optimize_gp(ucb)

    # Execute the policy and observe the true motion.
    debug = False
    viz = False
    if gripper is None:
        _, gripper = setup_env(sampler.bb, viz, debug)
    else:
        gripper.reset(sampler.mech)
    pose_handle_base_world = sampler.mech.get_pose_handle_base_world()
    traj = stop_policy.generate_trajectory(pose_handle_base_world, debug=debug)
    cmotion, motion, _ = gripper.execute_trajectory(traj, sampler.mech, stop_policy.type, debug=debug)

    # Calculate the regret.
    max_d = sampler.mech.get_max_net_motion()
    regret = (max_d - motion)/max_d

    return regret, start_x, stop_x, stop_policy.type
Ejemplo n.º 2
0
def main(args):
    busybox_data = get_bb_dataset(args.bb_fname, args.N, args.mech_types, 1,
                                  args.urdf_num)
    all_steps = []
    for ix, bb_results in enumerate(busybox_data):
        if args.type == 'gpucb':
            single_dataset, _, steps = create_single_bb_gpucb_dataset(
                bb_results[0],
                '',
                args.plot,
                args,
                ix,
                success_regret=SUCCESS_REGRET)
            all_steps.append(steps)
            print('steps', steps)
        elif args.type == 'random':
            bb = BusyBox.bb_from_result(bb_results[0])
            image_data, gripper = setup_env(bb, args.viz, args.debug)
            regret = float("inf")
            steps = 0
            while regret > SUCCESS_REGRET:
                mech = bb._mechanisms[0]
                # generate either a random or model-based policy and goal configuration
                policy = policies.generate_policy(mech)
                pose_handle_world_init = mech.get_handle_pose()

                # calculate trajectory
                pose_handle_base_world = mech.get_pose_handle_base_world()
                traj = policy.generate_trajectory(pose_handle_base_world,
                                                  args.debug,
                                                  color=[0, 0, 1])

                # execute trajectory
                cumu_motion, net_motion, pose_handle_world_final = \
                        gripper.execute_trajectory(traj, mech, policy.type, args.debug)

                # calc regret
                max_dist = mech.get_max_net_motion()
                regret = (max_dist - net_motion) / max_dist
                steps += 1

                # reset
                gripper.reset(mech)
            p.disconnect()
            all_steps.append(steps)
            print('steps', steps)

    # Save the dataset.
    util.write_to_file(args.fname, all_steps)
Ejemplo n.º 3
0
def replay_result(result):
    from gen.generator_busybox import BusyBox
    from utils.setup_pybullet import setup_env
    from actions.policies import get_policy_from_tuple

    bb = BusyBox.bb_from_result(result)
    image_data, gripper = setup_env(bb, True, True)
    mech = bb._mechanisms[0]
    policy = get_policy_from_tuple(result.policy_params)
    pose_handle_base_world = mech.get_pose_handle_base_world()
    traj = policy.generate_trajectory(pose_handle_base_world, True)
    cumu_motion, net_motion, pose_handle_world_final = gripper.execute_trajectory(
        traj, mech, policy.type, True)
    import pdb
    pdb.set_trace()
    p.disconnect()
def get_true_ys(X_pred, mech, policy_params):
    Y_pred = np.zeros((X_pred.shape[0]))
    viz = False
    debug = False
    for i, x in enumerate(X_pred):
        policy = policies.get_policy_from_x(mech, x, policy_params)
        width, height = 0.6, 0.6
        bb = BusyBox.get_busybox(width, height, [mech])
        _, gripper = setup_env(bb, viz, debug)

        # calculate trajectory
        pose_handle_base_world = mech.get_pose_handle_base_world()
        traj = policy.generate_trajectory(pose_handle_base_world, debug)

        # execute trajectory
        _, net_motion, _ = gripper.execute_trajectory(traj, mech, policy.type, False)
        Y_pred[i] = net_motion
        gripper.reset(mech)
    return Y_pred
def generate_dataset(args, git_hash):
    bb_dataset = get_bb_dataset(args.bb_fname, args.n_bbs, args.mech_types, args.max_mech, args.urdf_num)
    if args.n_samples == 0:
        return bb_dataset

    results = []
    for (i, bb_results) in enumerate(bb_dataset):
        bb = BusyBox.bb_from_result(bb_results[0])
        image_data, gripper = setup_env(bb, args.viz, args.debug)
        bb_results = []
        for j in range(args.n_samples):
            sys.stdout.write("\rProcessing sample %i/%i for busybox %i/%i" % (j+1, args.n_samples, i+1, args.n_bbs))
            for mech in bb._mechanisms:
                # generate either a random or model-based policy and goal configuration
                policy = policies.generate_policy(mech)
                pose_handle_world_init = mech.get_handle_pose()

                # calculate trajectory
                pose_handle_base_world = mech.get_pose_handle_base_world()
                traj = policy.generate_trajectory(pose_handle_base_world, args.debug, color=[0, 0, 1])

                # execute trajectory
                cumu_motion, net_motion, pose_handle_world_final = \
                        gripper.execute_trajectory(traj, mech, policy.type, args.debug)
                # save result data
                policy_params = policy.get_policy_tuple()
                mechanism_params = mech.get_mechanism_tuple()
                result = util.Result(policy_params, mechanism_params, net_motion, \
                            cumu_motion, pose_handle_world_init, pose_handle_world_final, \
                            image_data, git_hash)
                bb_results.append(result)

                gripper.reset(mech)
        results.append(bb_results)
        p.disconnect()
    print()
    return results
def get_bb_dataset(bb_fname, n_bbs, mech_types, max_mech, urdf_num):
    # Create a dataset of busyboxes.
    if bb_fname == '' or bb_fname is None:
        print('Creating Busyboxes.')
        mech_classes = []
        for mech_type in mech_types:
            if mech_type == 'slider': mech_classes.append(Slider)
            if mech_type == 'door': mech_classes.append(Door)

        bb_dataset = []
        for _ in range(n_bbs):
            # TODO: i think there is a bug here...
            bb = BusyBox.generate_random_busybox(max_mech=max_mech,
                                                    mech_types=mech_classes,
                                                    urdf_tag=urdf_num)
            mechanism_params = bb._mechanisms[0].get_mechanism_tuple()
            image_data, gripper = setup_env(bb, False, False)
            bb_dataset.append([util.Result(None, mechanism_params, None, None, None,
                                None, image_data, None)])
        print('BusyBoxes created.')
    else:
        # Load in a file with predetermined BusyBoxes.
        bb_dataset = read_from_file(bb_fname)[:n_bbs]
    return bb_dataset
Ejemplo n.º 7
0
def create_single_bb_gpucb_dataset(bb_result, nn_fname, plot, args, bb_i,
                                   n_interactions=None, plot_dir_prefix='',
                                   ret_regret=False, success_regret=None):
    use_cuda = False
    dataset = []
    viz = False
    debug = False
    # interact with BB
    bb = BusyBox.bb_from_result(bb_result, urdf_num=args.urdf_num)
    mech = bb._mechanisms[0]
    image_data, gripper = setup_env(bb, viz, debug)

    pose_handle_base_world = mech.get_pose_handle_base_world()
    sampler = UCB_Interaction(bb, image_data, plot, args, nn_fname=nn_fname)
    for ix in itertools.count():
        gripper.reset(mech)
        if args.debug:
            sys.stdout.write('\rProcessing sample %i' % ix)
        # if we are done sampling n_interactions OR we need to get regret after
        # each interaction
        if ((not n_interactions is None) and ix==n_interactions) or \
            (not success_regret is None):

            regret, start_x, stop_x, policy_type = test_model(sampler, args, gripper=gripper)
            gripper.reset(mech)

            #print('Current regret', regret)
            opt_points = (policy_type, [(start_x, 'g'), (stop_x, 'r')])
            sample_points = {'Prismatic': [(sample, 'k') for sample in sampler.xs['Prismatic']],
                            'Revolute': [(sample, 'k') for sample in sampler.xs['Revolute']]}

            # if done sampling n_interactions
            if (not n_interactions is None) and ix==n_interactions:
                if plot:
                    viz_circles(util.GP_PLOT,
                                image_data,
                                mech,
                                BETA,
                                sample_points=sample_points,
                                opt_points=opt_points,
                                gps=sampler.gps,
                                nn=sampler.nn,
                                bb_i=bb_i,
                                plot_dir_prefix=plot_dir_prefix)
                    plt.show()
                    input('Enter to close')
                    plt.close()
                if ret_regret:
                    return dataset, sampler.gps, regret
                else:
                    return dataset, sampler.gps
            # if got successful interaction or timeout
            elif (not success_regret is None) and \
                            ((regret < success_regret) or (ix >= 100)):
                if ix >= 100:
                    print('timeout interactions')
                elif regret < success_regret:
                    print('succcessful interaction!')
                if plot:
                    viz_circles(util.GP_PLOT,
                                image_data,
                                mech,
                                BETA,
                                sample_points=sample_points,
                                opt_points=opt_points,
                                gps=sampler.gps,
                                nn=sampler.nn,
                                bb_i=bb_i,
                                plot_dir_prefix=plot_dir_prefix)
                return dataset, sampler.gps, ix

        # sample a policy
        # image_data, gripper = setup_env(bb, False, debug)

        gripper.reset(mech)
        x, policy = sampler.sample()

        # execute
        traj = policy.generate_trajectory(pose_handle_base_world, debug=debug)
        c_motion, motion, handle_pose_final = gripper.execute_trajectory(traj, mech, policy.type, False)
        result = util.Result(policy.get_policy_tuple(), mech.get_mechanism_tuple(),
                             motion, c_motion, handle_pose_final, handle_pose_final,
                             image_data, None)
        dataset.append(result)

        # update GP
        sampler.update(result, x)