Beispiel #1
0
def main(args):
    busybox_data = get_bb_dataset(args.bb_fname, args.N, args.mech_types, 1,
                                  args.urdf_num)
    all_steps = []
    for ix, bb_results in enumerate(busybox_data):
        if args.type == 'gpucb':
            single_dataset, _, steps = create_single_bb_gpucb_dataset(
                bb_results[0],
                '',
                args.plot,
                args,
                ix,
                success_regret=SUCCESS_REGRET)
            all_steps.append(steps)
            print('steps', steps)
        elif args.type == 'random':
            bb = BusyBox.bb_from_result(bb_results[0])
            image_data, gripper = setup_env(bb, args.viz, args.debug)
            regret = float("inf")
            steps = 0
            while regret > SUCCESS_REGRET:
                mech = bb._mechanisms[0]
                # generate either a random or model-based policy and goal configuration
                policy = policies.generate_policy(mech)
                pose_handle_world_init = mech.get_handle_pose()

                # calculate trajectory
                pose_handle_base_world = mech.get_pose_handle_base_world()
                traj = policy.generate_trajectory(pose_handle_base_world,
                                                  args.debug,
                                                  color=[0, 0, 1])

                # execute trajectory
                cumu_motion, net_motion, pose_handle_world_final = \
                        gripper.execute_trajectory(traj, mech, policy.type, args.debug)

                # calc regret
                max_dist = mech.get_max_net_motion()
                regret = (max_dist - net_motion) / max_dist
                steps += 1

                # reset
                gripper.reset(mech)
            p.disconnect()
            all_steps.append(steps)
            print('steps', steps)

    # Save the dataset.
    util.write_to_file(args.fname, all_steps)
Beispiel #2
0
def replay_result(result):
    from gen.generator_busybox import BusyBox
    from utils.setup_pybullet import setup_env
    from actions.policies import get_policy_from_tuple

    bb = BusyBox.bb_from_result(result)
    image_data, gripper = setup_env(bb, True, True)
    mech = bb._mechanisms[0]
    policy = get_policy_from_tuple(result.policy_params)
    pose_handle_base_world = mech.get_pose_handle_base_world()
    traj = policy.generate_trajectory(pose_handle_base_world, True)
    cumu_motion, net_motion, pose_handle_world_final = gripper.execute_trajectory(
        traj, mech, policy.type, True)
    import pdb
    pdb.set_trace()
    p.disconnect()
Beispiel #3
0
 def plot_test_set():
     # plot test data angles and positions
     for (i, point) in enumerate(test_data[:10]):
         bb = BusyBox.bb_from_result(point)
         if p.getConnectionInfo()['isConnected']:
             p.disconnect()
         setup_pybullet.setup_env(bb, False, False)
         mech = bb._mechanisms[0]
         true_policy = policies.generate_policy(mech, False)
         pos = (true_policy.rigid_position[0],
                true_policy.rigid_position[2])
         pos_ax.plot(*pos, 'm.')
         pos_ax.annotate(i, pos)
         if i == len(test_data):
             angle_ax.plot(true_policy.pitch, 2.0, 'k.', label='test data')
         else:
             angle_ax.plot(true_policy.pitch, 2.0, 'k.')
         angle_ax.annotate(i, (true_policy.pitch, 2.0))
def generate_dataset(args, git_hash):
    bb_dataset = get_bb_dataset(args.bb_fname, args.n_bbs, args.mech_types, args.max_mech, args.urdf_num)
    if args.n_samples == 0:
        return bb_dataset

    results = []
    for (i, bb_results) in enumerate(bb_dataset):
        bb = BusyBox.bb_from_result(bb_results[0])
        image_data, gripper = setup_env(bb, args.viz, args.debug)
        bb_results = []
        for j in range(args.n_samples):
            sys.stdout.write("\rProcessing sample %i/%i for busybox %i/%i" % (j+1, args.n_samples, i+1, args.n_bbs))
            for mech in bb._mechanisms:
                # generate either a random or model-based policy and goal configuration
                policy = policies.generate_policy(mech)
                pose_handle_world_init = mech.get_handle_pose()

                # calculate trajectory
                pose_handle_base_world = mech.get_pose_handle_base_world()
                traj = policy.generate_trajectory(pose_handle_base_world, args.debug, color=[0, 0, 1])

                # execute trajectory
                cumu_motion, net_motion, pose_handle_world_final = \
                        gripper.execute_trajectory(traj, mech, policy.type, args.debug)
                # save result data
                policy_params = policy.get_policy_tuple()
                mechanism_params = mech.get_mechanism_tuple()
                result = util.Result(policy_params, mechanism_params, net_motion, \
                            cumu_motion, pose_handle_world_init, pose_handle_world_final, \
                            image_data, git_hash)
                bb_results.append(result)

                gripper.reset(mech)
        results.append(bb_results)
        p.disconnect()
    print()
    return results
Beispiel #5
0
def create_single_bb_gpucb_dataset(bb_result, nn_fname, plot, args, bb_i,
                                   n_interactions=None, plot_dir_prefix='',
                                   ret_regret=False, success_regret=None):
    use_cuda = False
    dataset = []
    viz = False
    debug = False
    # interact with BB
    bb = BusyBox.bb_from_result(bb_result, urdf_num=args.urdf_num)
    mech = bb._mechanisms[0]
    image_data, gripper = setup_env(bb, viz, debug)

    pose_handle_base_world = mech.get_pose_handle_base_world()
    sampler = UCB_Interaction(bb, image_data, plot, args, nn_fname=nn_fname)
    for ix in itertools.count():
        gripper.reset(mech)
        if args.debug:
            sys.stdout.write('\rProcessing sample %i' % ix)
        # if we are done sampling n_interactions OR we need to get regret after
        # each interaction
        if ((not n_interactions is None) and ix==n_interactions) or \
            (not success_regret is None):

            regret, start_x, stop_x, policy_type = test_model(sampler, args, gripper=gripper)
            gripper.reset(mech)

            #print('Current regret', regret)
            opt_points = (policy_type, [(start_x, 'g'), (stop_x, 'r')])
            sample_points = {'Prismatic': [(sample, 'k') for sample in sampler.xs['Prismatic']],
                            'Revolute': [(sample, 'k') for sample in sampler.xs['Revolute']]}

            # if done sampling n_interactions
            if (not n_interactions is None) and ix==n_interactions:
                if plot:
                    viz_circles(util.GP_PLOT,
                                image_data,
                                mech,
                                BETA,
                                sample_points=sample_points,
                                opt_points=opt_points,
                                gps=sampler.gps,
                                nn=sampler.nn,
                                bb_i=bb_i,
                                plot_dir_prefix=plot_dir_prefix)
                    plt.show()
                    input('Enter to close')
                    plt.close()
                if ret_regret:
                    return dataset, sampler.gps, regret
                else:
                    return dataset, sampler.gps
            # if got successful interaction or timeout
            elif (not success_regret is None) and \
                            ((regret < success_regret) or (ix >= 100)):
                if ix >= 100:
                    print('timeout interactions')
                elif regret < success_regret:
                    print('succcessful interaction!')
                if plot:
                    viz_circles(util.GP_PLOT,
                                image_data,
                                mech,
                                BETA,
                                sample_points=sample_points,
                                opt_points=opt_points,
                                gps=sampler.gps,
                                nn=sampler.nn,
                                bb_i=bb_i,
                                plot_dir_prefix=plot_dir_prefix)
                return dataset, sampler.gps, ix

        # sample a policy
        # image_data, gripper = setup_env(bb, False, debug)

        gripper.reset(mech)
        x, policy = sampler.sample()

        # execute
        traj = policy.generate_trajectory(pose_handle_base_world, debug=debug)
        c_motion, motion, handle_pose_final = gripper.execute_trajectory(traj, mech, policy.type, False)
        result = util.Result(policy.get_policy_tuple(), mech.get_mechanism_tuple(),
                             motion, c_motion, handle_pose_final, handle_pose_final,
                             image_data, None)
        dataset.append(result)

        # update GP
        sampler.update(result, x)
Beispiel #6
0
def viz_train_test_data(train_data, test_data):
    import matplotlib.pyplot as plt
    from gen.generator_busybox import BusyBox

    n_inter_per_bb = 100
    n_bbs = int(len(train_data) / n_inter_per_bb)
    training_dataset_size = n_bbs  #10

    plt.ion()
    angle_fig, angle_ax = plt.subplots()
    pos_fig, pos_ax = plt.subplots()

    def plot_test_set():
        # plot test data angles and positions
        for (i, point) in enumerate(test_data[:10]):
            bb = BusyBox.bb_from_result(point)
            if p.getConnectionInfo()['isConnected']:
                p.disconnect()
            setup_pybullet.setup_env(bb, False, False)
            mech = bb._mechanisms[0]
            true_policy = policies.generate_policy(mech, False)
            pos = (true_policy.rigid_position[0],
                   true_policy.rigid_position[2])
            pos_ax.plot(*pos, 'm.')
            pos_ax.annotate(i, pos)
            if i == len(test_data):
                angle_ax.plot(true_policy.pitch, 2.0, 'k.', label='test data')
            else:
                angle_ax.plot(true_policy.pitch, 2.0, 'k.')
            angle_ax.annotate(i, (true_policy.pitch, 2.0))

    dataset_sizes = list(
        range(training_dataset_size, n_bbs + 1, training_dataset_size))
    pitches = []
    # plot training data angles and positions
    for n in range(1, n_bbs + 1):
        point = train_data[(n - 1) * n_inter_per_bb]
        if p.getConnectionInfo()['isConnected']:
            p.disconnect()
        bb = BusyBox.bb_from_result(point)
        setup_pybullet.setup_env(bb, False, False)
        mech = bb._mechanisms[0]
        true_policy = policies.generate_policy(mech, False)
        pitches += [true_policy.pitch]
        pos = (true_policy.rigid_position[0], true_policy.rigid_position[2])
        pos_ax.plot(*pos, 'c.')

        if n in dataset_sizes:
            angle_ax.clear()
            pos_ax.set_title('Positions of Training and Test Data, n_bbs=' +
                             str(n))
            pos_fig.savefig('dataset_imgs/poss_n_bbs' + str(n))

            angle_ax.set_title('Historgram of Training Data Angle, n_bbs=' +
                               str(n))
            angle_ax.hist(pitches, 30)
            plot_test_set()
            #angle_fig.savefig('dataset_imgs/pitches_n_bbs'+str(n))
            plt.show()
            input('enter to close')

            angle_ax.set_title('Historgram of Training Data Angle, n_bbs=' +
                               str(n))
            angle_ax.hist(pitches, 30)
            plot_test_set()
            #angle_fig.savefig('dataset_imgs/pitches_n_bbs'+str(n))
            plt.show()
            input('enter to close')