Beispiel #1
0
    kernel_matrix = kernel.matrix(candidates)

    return BanditCorrelatedExperimentResult(avg_ua_rewards, avg_ts_rewards, avg_ts_corr_rewards,
                                            estimated_pfc, ua_result.iters, kernel_matrix, obj_key=obj.key)
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('config', default='cfg/correlated.yaml')
    parser.add_argument('output_dest', default='out/')
    args = parser.parse_args()

    logging.getLogger().setLevel(logging.INFO)

    # read config file
    config = ec.ExperimentConfig(args.config)
    chunk = db.Chunk(config)

    # make output directory
    dest = os.path.join(args.output_dest, chunk.name)
    try:
        os.makedirs(dest)
    except os.error:
        pass

    # loop through objects, labelling each
    results = []
    avg_experiment_result = None
    for obj in chunk:
        logging.info('Labelling object {}'.format(obj.key))
        experiment_result = label_correlated(obj, chunk, dest, config)
        if experiment_result is None:
Beispiel #2
0
def label_correlated(obj, chunk, dest, config, plot=False, load=True):
    """Label an object with grasps according to probability of force closure,
    using correlated bandits."""
    bandit_start = time.clock()

    np.random.seed(100)
    chunk = db.Chunk(config)

    if not load:
        # load grasps from database
        sample_start = time.clock()

        if config['grasp_sampler'] == 'antipodal':
            logging.info('Using antipodal grasp sampling')
            sampler = ags.AntipodalGraspSampler(config)
            grasps = sampler.generate_grasps(
                obj, check_collisions=config['check_collisions'], vis=plot)

            # pad with gaussian grasps
            num_grasps = len(grasps)
            min_num_grasps = config['min_num_grasps']
            if num_grasps < min_num_grasps:
                target_num_grasps = min_num_grasps - num_grasps
                gaussian_sampler = gs.GaussianGraspSampler(config)
                gaussian_grasps = gaussian_sampler.generate_grasps(
                    obj,
                    target_num_grasps=target_num_grasps,
                    check_collisions=config['check_collisions'],
                    vis=plot)
                grasps.extend(gaussian_grasps)
        else:
            logging.info('Using Gaussian grasp sampling')
            sampler = gs.GaussianGraspSampler(config)
            grasps = sampler.generate_grasps(
                obj,
                check_collisions=config['check_collisions'],
                vis=plot,
                grasp_gen_mult=6)
        sample_end = time.clock()
        sample_duration = sample_end - sample_start
        logging.info('Loaded %d grasps' % (len(grasps)))
        logging.info('Grasp candidate loading took %f sec' % (sample_duration))

        if not grasps:
            logging.info('Skipping %s' % (obj.key))
            return None

    else:
        grasps = load_grasps(obj, dest)
        grasps = grasps[:20]


#        grasps = chunk.load_grasps(obj.key)

# load features for all grasps
    feature_start = time.clock()
    feature_extractor = ff.GraspableFeatureExtractor(obj, config)

    features = feature_extractor.compute_all_features(grasps)
    """
    if not load:
        features = feature_extractor.compute_all_features(grasps)
    else:
        feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config)
        features = feature_loader.load_all_features(grasps) # in same order as grasps
    """
    feature_end = time.clock()
    feature_duration = feature_end - feature_start
    logging.info('Loaded %d features' % (len(features)))
    logging.info('Grasp feature loading took %f sec' % (feature_duration))

    # prune crappy grasps
    all_features = []
    all_grasps = []
    for grasp, feature in zip(grasps, features):
        if feature is not None:
            all_grasps.append(grasp)
            all_features.append(feature)
    grasps = all_grasps

    # compute distances for debugging
    distances = np.zeros([len(grasps), len(grasps)])
    i = 0
    for feature_i in all_features:
        j = 0
        for feature_j in all_features:
            distances[i, j] = np.linalg.norm(feature_i.phi - feature_j.phi)
            j += 1
        i += 1

    # bandit params
    brute_force_iter = config['bandit_brute_force_iter']
    max_iter = config['bandit_max_iter']
    confidence = config['bandit_confidence']
    snapshot_rate = config['bandit_snapshot_rate']
    tc_list = [
        tc.MaxIterTerminationCondition(max_iter),
    ]

    # run bandits!
    graspable_rv = pfc.GraspableObjectGaussianPose(obj, config)
    f_rv = scipy.stats.norm(config['friction_coef'],
                            config['sigma_mu'])  # friction Gaussian RV

    candidates = []
    for grasp, features in zip(grasps, all_features):
        logging.info('Adding grasp %d' % len(candidates))
        grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config)
        pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)
        if features is None:
            logging.info('Could not compute features for grasp.')
        else:
            pfc_rv.set_features(features)
            candidates.append(pfc_rv)

    # feature transform
    def phi(rv):
        return rv.features

    nn = kernels.KDTree(phi=phi)
    kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'],
                                              l=config['kernel_l'],
                                              phi=phi)
    objective = objectives.RandomBinaryObjective()

    if not load:
        # uniform allocation for true values
        ua = das.UniformAllocationMean(objective, candidates)
        logging.info('Running uniform allocation for true pfc.')
        ua_result = ua.solve(
            termination_condition=tc.MaxIterTerminationCondition(
                brute_force_iter),
            snapshot_rate=snapshot_rate)
        estimated_pfc = models.BetaBernoulliModel.beta_mean(
            ua_result.models[-1].alphas, ua_result.models[-1].betas)

        save_grasps(grasps, estimated_pfc, obj, dest)

        # plot params
        line_width = config['line_width']
        font_size = config['font_size']
        dpi = config['dpi']

        # plot histograms
        num_bins = 100
        bin_edges = np.linspace(0, 1, num_bins + 1)
        plt.figure()
        n, bins, patches = plt.hist(estimated_pfc, bin_edges)
        plt.xlabel('Probability of Success', fontsize=font_size)
        plt.ylabel('Num Grasps', fontsize=font_size)
        plt.title('Histogram of Grasps by Probability of Success',
                  fontsize=font_size)
        plt.show()

        exit(0)
    else:
        estimated_pfc = np.array([g.quality for g in grasps])

    # debugging for examining bad features
    bad_i = 0
    bad_j = 1
    grasp_i = grasps[bad_i]
    grasp_j = grasps[bad_j]
    pfc_i = estimated_pfc[bad_i]
    pfc_j = estimated_pfc[bad_j]
    features_i = all_features[bad_i]
    features_j = all_features[bad_j]
    feature_sq_diff = (features_i.phi - features_j.phi)**2
    #    grasp_i.close_fingers(obj, vis=True)
    #    grasp_j.close_fingers(obj, vis=True)

    grasp_i.surface_information(obj, config['window_width'],
                                config['window_steps'])
    grasp_j.surface_information(obj, config['window_width'],
                                config['window_steps'])

    w = config['window_steps']
    wi1 = np.reshape(features_i.extractors_[0].extractors_[1].phi, [w, w])
    wi2 = np.reshape(features_i.extractors_[1].extractors_[1].phi, [w, w])
    wj1 = np.reshape(features_j.extractors_[0].extractors_[1].phi, [w, w])
    wj2 = np.reshape(features_j.extractors_[1].extractors_[1].phi, [w, w])

    a = 0.1
    plt.figure()
    plt.subplot(2, 2, 1)
    plt.imshow(wi1, cmap=plt.cm.Greys, interpolation='none')
    plt.colorbar()
    plt.clim(-a, a)  # fixing color range for visual comparisons
    plt.title('wi1')

    plt.subplot(2, 2, 2)
    plt.imshow(wi2, cmap=plt.cm.Greys, interpolation='none')
    plt.colorbar()
    plt.clim(-a, a)  # fixing color range for visual comparisons
    plt.title('wi2')

    plt.subplot(2, 2, 3)
    plt.imshow(wj1, cmap=plt.cm.Greys, interpolation='none')
    plt.colorbar()
    plt.clim(-a, a)  # fixing color range for visual comparisons
    plt.title('wj1')

    plt.subplot(2, 2, 4)
    plt.imshow(wj2, cmap=plt.cm.Greys, interpolation='none')
    plt.colorbar()
    plt.clim(-a, a)  # fixing color range for visual comparisons
    plt.title('wj2')

    #    plt.show()
    #    IPython.embed()

    num_trials = config['num_trials']
    ts_rewards = []
    ts_corr_rewards = []

    for t in range(num_trials):
        logging.info('Trial %d' % (t))

        # Thompson sampling
        ts = das.ThompsonSampling(objective, candidates)
        logging.info('Running Thompson sampling.')
        ts_result = ts.solve(
            termination_condition=tc.OrTerminationCondition(tc_list),
            snapshot_rate=snapshot_rate)

        # correlated Thompson sampling for even faster convergence
        ts_corr = das.CorrelatedThompsonSampling(
            objective,
            candidates,
            nn,
            kernel,
            tolerance=config['kernel_tolerance'])
        logging.info('Running correlated Thompson sampling.')
        ts_corr_result = ts_corr.solve(
            termination_condition=tc.OrTerminationCondition(tc_list),
            snapshot_rate=snapshot_rate)

        ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc)
        ts_corr_normalized_reward = reward_vs_iters(ts_corr_result,
                                                    estimated_pfc)

        ts_rewards.append(ts_normalized_reward)
        ts_corr_rewards.append(ts_corr_normalized_reward)

    # get the bandit rewards
    all_ts_rewards = np.array(ts_rewards)
    all_ts_corr_rewards = np.array(ts_corr_rewards)
    avg_ts_rewards = np.mean(all_ts_rewards, axis=0)
    avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0)

    # get correlations and plot
    k = kernel.matrix(candidates)
    k_vec = k.ravel()
    pfc_arr = np.array([estimated_pfc]).T
    pfc_diff = ssd.squareform(ssd.pdist(pfc_arr))
    pfc_vec = pfc_diff.ravel()

    bad_ind = np.where(pfc_diff > 1.0 - k)

    plt.figure()
    plt.scatter(k_vec, pfc_vec)
    plt.xlabel('Kernel', fontsize=15)
    plt.ylabel('PFC Diff', fontsize=15)
    plt.title('Correlations', fontsize=15)
    #    plt.show()

    #    IPython.embed()

    # plot params
    line_width = config['line_width']
    font_size = config['font_size']
    dpi = config['dpi']

    # plot histograms
    num_bins = 100
    bin_edges = np.linspace(0, 1, num_bins + 1)
    plt.figure()
    n, bins, patches = plt.hist(estimated_pfc, bin_edges)
    plt.xlabel('Probability of Success', fontsize=font_size)
    plt.ylabel('Num Grasps', fontsize=font_size)
    plt.title('Histogram of Grasps by Probability of Success',
              fontsize=font_size)

    # plot the results
    plt.figure()
    plt.plot(ts_result.iters,
             avg_ts_rewards,
             c=u'g',
             linewidth=line_width,
             label='Thompson Sampling (Uncorrelated)')
    plt.plot(ts_corr_result.iters,
             avg_ts_corr_rewards,
             c=u'r',
             linewidth=line_width,
             label='Thompson Sampling (Correlated)')

    plt.xlim(0, np.max(ts_result.iters))
    plt.ylim(0.5, 1)
    plt.xlabel('Iteration', fontsize=font_size)
    plt.ylabel('Normalized Probability of Force Closure', fontsize=font_size)
    plt.title('Avg Normalized PFC vs Iteration', fontsize=font_size)

    handles, labels = plt.gca().get_legend_handles_labels()
    plt.legend(handles, labels, loc='lower right')
    plt.show()

    IPython.embed()
    """
    # aggregate grasps
    object_grasps = [candidates[i].grasp for i in ts_result.best_candidates]
    grasp_qualities = list(ts_result.best_pred_means)

    bandit_stop = time.clock()
    logging.info('Bandits took %f sec' %(bandit_stop - bandit_start))

    # get rotated, translated versions of grasps
    delay = 0
    pr2_grasps = []
    pr2_grasp_qualities = []
    theta_res = config['grasp_theta_res'] * np.pi
#    grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps'])

    if config['vis_grasps']:
        delay = config['vis_delay']

    for grasp, grasp_quality in zip(object_grasps, grasp_qualities):
        rotated_grasps = grasp.transform(obj.tf, theta_res)
#        rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay)
        pr2_grasps.extend(rotated_grasps)
        pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps))

    logging.info('Num grasps: %d' %(len(pr2_grasps)))

    grasp_filename = os.path.join(dest, obj.key + '.json')
    with open(grasp_filename, 'w') as f:
        jsons.dump([g.to_json(quality=q) for g, q in
                   zip(pr2_grasps, pr2_grasp_qualities)], f)

    ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc)
    ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc)
    ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc)

    return BanditCorrelatedExperimentResult(ua_normalized_reward, ts_normalized_reward, ts_corr_normalized_reward,
                                            ua_result, ts_result, ts_corr_result, obj_key=obj.key)
                                            """
    return None