def getprobability(object, grasps): obj_name = object[1] sdf_name = object[2] obj_mesh = of.ObjFile(obj_name).read() sdf_ = sf.SdfFile(sdf_name).read() obj = go.GraspableObject3D(sdf_, mesh=obj_mesh, key=object[0].replace("_features.txt", ""), model_name=obj_name) config_name = "cfg/correlated.yaml" config = ec.ExperimentConfig(config_name) np.random.seed(100) brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), # tc.ConfidenceTerminationCondition(confidence) ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV # compute feature vectors for all grasps feature_extractor = ff.GraspableFeatureExtractor(obj, config) all_features = feature_extractor.compute_all_features(grasps) candidates = [] for grasp, features in zip(grasps, all_features): grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: pass else: pfc_rv.set_features(features) candidates.append(pfc_rv) def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # uniform allocation for true values ua = das.UniformAllocationMean(objective, candidates) ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) return estimated_pfc
def run_ua_on(obj, config): # sample grasps sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions']) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps( obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions']) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions']) # generate pfc candidates graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) candidates = [] for grasp in grasps: logging.info('Adding grasp %d candidate' % (len(candidates))) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) candidates.append(pfc_rv) logging.info('%d candidates', len(candidates)) brute_force_iter = config['bandit_brute_force_iter'] * len(candidates) snapshot_rate = brute_force_iter objective = objectives.RandomBinaryObjective() ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') bandit_start = time.clock() ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) bandit_end = time.clock() bandit_duration = bandit_end - bandit_start logging.info('Uniform allocation (%d iters) took %f sec' % (brute_force_iter, bandit_duration)) return ua_result
def load_candidate_grasps(obj, chunk): # load grasps from database sample_start = time.clock() grasps = chunk.load_grasps(obj.key) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' % (len(grasps))) logging.info('Grasp candidate loading took %f sec' % (sample_duration)) if not grasps: logging.info('Skipping %s' % (obj.key)) return None # load features for all grasps feature_start = time.clock() feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config) all_features = feature_loader.load_all_features( grasps) # in same order as grasps feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' % (len(all_features))) logging.info('Grasp feature loading took %f sec' % (feature_duration)) # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] out_rate = 50 for k, (grasp, features) in enumerate(zip(grasps, all_features)): if k % out_rate == 0: logging.info('Adding grasp %d' % (k)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) return candidates
def label_correlated(obj, chunk, dest, config, plot=False, load=True): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() #np.random.seed(100) # sample grasps sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps(obj, check_collisions=config['check_collisions'], vis=False) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps(obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions'], vis=plot) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps(obj, check_collisions=config['check_collisions'], vis=plot, grasp_gen_mult = 6) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' %(len(grasps))) logging.info('Grasp candidate loading took %f sec' %(sample_duration)) if not grasps: logging.info('Skipping %s' %(obj.key)) return None # extract load features for all grasps feature_start = time.clock() feature_extractor = ff.GraspableFeatureExtractor(obj, config) all_features = feature_extractor.compute_all_features(grasps) feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' %(len(all_features))) logging.info('Grasp feature loading took %f sec' %(feature_duration)) # bandit params num_trials = config['num_trials'] brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] brute_snapshot_rate = config['bandit_brute_force_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] # set up randome variables graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' %len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features # create nn structs for kernels nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel( sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # uniform allocation for true values ua_brute = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_brute_result = ua_brute.solve(termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=brute_snapshot_rate) final_model = ua_brute_result.models[-1] estimated_pfc = models.BetaBernoulliModel.beta_mean(final_model.alphas, final_model.betas) save_grasps(grasps, estimated_pfc, obj, dest, num_successes=final_model.alphas, num_failures=final_model.betas) # run bandits for several trials ua_rewards = [] ts_rewards = [] ts_corr_rewards = [] for t in range(num_trials): logging.info('Trial %d' %(t)) # Uniform sampling ua = das.UniformAllocationMean(objective, candidates) logging.info('Running Uniform allocation.') ua_result = ua.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # compile results ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) ua_rewards.append(ua_normalized_reward) ts_rewards.append(ts_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) # get the bandit rewards all_ua_rewards = np.array(ua_rewards) all_ts_rewards = np.array(ts_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) # compute avg normalized rewards avg_ua_rewards = np.mean(all_ua_rewards, axis=0) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) # kernel matrix kernel_matrix = kernel.matrix(candidates) return BanditCorrelatedExperimentResult(avg_ua_rewards, avg_ts_rewards, avg_ts_corr_rewards, estimated_pfc, ua_result.iters, kernel_matrix, obj_key=obj.key)
def test_window_correlation(width, num_steps, vis=True): import scipy import sdf_file, obj_file import discrete_adaptive_samplers as das import experiment_config as ec import feature_functions as ff import graspable_object as go # weird Python issues import kernels import models import objectives import pfc import termination_conditions as tc np.random.seed(100) mesh_file_name = 'data/test/meshes/Co_clean.obj' sdf_3d_file_name = 'data/test/sdf/Co_clean.sdf' config = ec.ExperimentConfig('cfg/correlated.yaml') config['window_width'] = width config['window_steps'] = num_steps brute_force_iter = 100 snapshot_rate = config['bandit_snapshot_rate'] sdf = sdf_file.SdfFile(sdf_3d_file_name).read() mesh = obj_file.ObjFile(mesh_file_name).read() graspable = go.GraspableObject3D(sdf, mesh) grasp_axis = np.array([0, 1, 0]) grasp_width = 0.1 grasps = [] for z in [-0.030, -0.035, -0.040, -0.045]: grasp_center = np.array([0, 0, z]) grasp = g.ParallelJawPtGrasp3D( ParallelJawPtGrasp3D.configuration_from_params( grasp_center, grasp_axis, grasp_width)) grasps.append(grasp) graspable_rv = pfc.GraspableObjectGaussianPose(graspable, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV # compute feature vectors for all grasps feature_extractor = ff.GraspableFeatureExtractor(graspable, config) all_features = feature_extractor.compute_all_features(grasps) candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) pfc_rv.set_features(features) candidates.append(pfc_rv) if vis: _, (c1, c2) = grasp.close_fingers(graspable) plt.figure() c1_proxy = c1.plot_friction_cone(color='m') c2_proxy = c2.plot_friction_cone(color='y') plt.legend([c1_proxy, c2_proxy], ['Cone 1', 'Cone 2']) plt.title('Grasp %d' % (len(candidates))) objective = objectives.RandomBinaryObjective() ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) print 'true pfc' print estimated_pfc def phi(rv): return rv.features kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) print 'kernel matrix' print kernel.matrix(candidates) if vis: plt.show()
def label_correlated(obj, chunk, dest, config, plot=False, load=True): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() np.random.seed(100) chunk = db.Chunk(config) if not load: # load grasps from database sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions'], vis=plot) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps( obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions'], vis=plot) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions'], vis=plot, grasp_gen_mult=6) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' % (len(grasps))) logging.info('Grasp candidate loading took %f sec' % (sample_duration)) if not grasps: logging.info('Skipping %s' % (obj.key)) return None else: grasps = load_grasps(obj, dest) grasps = grasps[:20] # grasps = chunk.load_grasps(obj.key) # load features for all grasps feature_start = time.clock() feature_extractor = ff.GraspableFeatureExtractor(obj, config) features = feature_extractor.compute_all_features(grasps) """ if not load: features = feature_extractor.compute_all_features(grasps) else: feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config) features = feature_loader.load_all_features(grasps) # in same order as grasps """ feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' % (len(features))) logging.info('Grasp feature loading took %f sec' % (feature_duration)) # prune crappy grasps all_features = [] all_grasps = [] for grasp, feature in zip(grasps, features): if feature is not None: all_grasps.append(grasp) all_features.append(feature) grasps = all_grasps # compute distances for debugging distances = np.zeros([len(grasps), len(grasps)]) i = 0 for feature_i in all_features: j = 0 for feature_j in all_features: distances[i, j] = np.linalg.norm(feature_i.phi - feature_j.phi) j += 1 i += 1 # bandit params brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() if not load: # uniform allocation for true values ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition( brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) save_grasps(grasps, estimated_pfc, obj, dest) # plot params line_width = config['line_width'] font_size = config['font_size'] dpi = config['dpi'] # plot histograms num_bins = 100 bin_edges = np.linspace(0, 1, num_bins + 1) plt.figure() n, bins, patches = plt.hist(estimated_pfc, bin_edges) plt.xlabel('Probability of Success', fontsize=font_size) plt.ylabel('Num Grasps', fontsize=font_size) plt.title('Histogram of Grasps by Probability of Success', fontsize=font_size) plt.show() exit(0) else: estimated_pfc = np.array([g.quality for g in grasps]) # debugging for examining bad features bad_i = 0 bad_j = 1 grasp_i = grasps[bad_i] grasp_j = grasps[bad_j] pfc_i = estimated_pfc[bad_i] pfc_j = estimated_pfc[bad_j] features_i = all_features[bad_i] features_j = all_features[bad_j] feature_sq_diff = (features_i.phi - features_j.phi)**2 # grasp_i.close_fingers(obj, vis=True) # grasp_j.close_fingers(obj, vis=True) grasp_i.surface_information(obj, config['window_width'], config['window_steps']) grasp_j.surface_information(obj, config['window_width'], config['window_steps']) w = config['window_steps'] wi1 = np.reshape(features_i.extractors_[0].extractors_[1].phi, [w, w]) wi2 = np.reshape(features_i.extractors_[1].extractors_[1].phi, [w, w]) wj1 = np.reshape(features_j.extractors_[0].extractors_[1].phi, [w, w]) wj2 = np.reshape(features_j.extractors_[1].extractors_[1].phi, [w, w]) a = 0.1 plt.figure() plt.subplot(2, 2, 1) plt.imshow(wi1, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wi1') plt.subplot(2, 2, 2) plt.imshow(wi2, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wi2') plt.subplot(2, 2, 3) plt.imshow(wj1, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wj1') plt.subplot(2, 2, 4) plt.imshow(wj2, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wj2') # plt.show() # IPython.embed() num_trials = config['num_trials'] ts_rewards = [] ts_corr_rewards = [] for t in range(num_trials): logging.info('Trial %d' % (t)) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) ts_rewards.append(ts_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) # get the bandit rewards all_ts_rewards = np.array(ts_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) # get correlations and plot k = kernel.matrix(candidates) k_vec = k.ravel() pfc_arr = np.array([estimated_pfc]).T pfc_diff = ssd.squareform(ssd.pdist(pfc_arr)) pfc_vec = pfc_diff.ravel() bad_ind = np.where(pfc_diff > 1.0 - k) plt.figure() plt.scatter(k_vec, pfc_vec) plt.xlabel('Kernel', fontsize=15) plt.ylabel('PFC Diff', fontsize=15) plt.title('Correlations', fontsize=15) # plt.show() # IPython.embed() # plot params line_width = config['line_width'] font_size = config['font_size'] dpi = config['dpi'] # plot histograms num_bins = 100 bin_edges = np.linspace(0, 1, num_bins + 1) plt.figure() n, bins, patches = plt.hist(estimated_pfc, bin_edges) plt.xlabel('Probability of Success', fontsize=font_size) plt.ylabel('Num Grasps', fontsize=font_size) plt.title('Histogram of Grasps by Probability of Success', fontsize=font_size) # plot the results plt.figure() plt.plot(ts_result.iters, avg_ts_rewards, c=u'g', linewidth=line_width, label='Thompson Sampling (Uncorrelated)') plt.plot(ts_corr_result.iters, avg_ts_corr_rewards, c=u'r', linewidth=line_width, label='Thompson Sampling (Correlated)') plt.xlim(0, np.max(ts_result.iters)) plt.ylim(0.5, 1) plt.xlabel('Iteration', fontsize=font_size) plt.ylabel('Normalized Probability of Force Closure', fontsize=font_size) plt.title('Avg Normalized PFC vs Iteration', fontsize=font_size) handles, labels = plt.gca().get_legend_handles_labels() plt.legend(handles, labels, loc='lower right') plt.show() IPython.embed() """ # aggregate grasps object_grasps = [candidates[i].grasp for i in ts_result.best_candidates] grasp_qualities = list(ts_result.best_pred_means) bandit_stop = time.clock() logging.info('Bandits took %f sec' %(bandit_stop - bandit_start)) # get rotated, translated versions of grasps delay = 0 pr2_grasps = [] pr2_grasp_qualities = [] theta_res = config['grasp_theta_res'] * np.pi # grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps']) if config['vis_grasps']: delay = config['vis_delay'] for grasp, grasp_quality in zip(object_grasps, grasp_qualities): rotated_grasps = grasp.transform(obj.tf, theta_res) # rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay) pr2_grasps.extend(rotated_grasps) pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps)) logging.info('Num grasps: %d' %(len(pr2_grasps))) grasp_filename = os.path.join(dest, obj.key + '.json') with open(grasp_filename, 'w') as f: jsons.dump([g.to_json(quality=q) for g, q in zip(pr2_grasps, pr2_grasp_qualities)], f) ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) return BanditCorrelatedExperimentResult(ua_normalized_reward, ts_normalized_reward, ts_corr_normalized_reward, ua_result, ts_result, ts_corr_result, obj_key=obj.key) """ return None
def label_correlated(obj, chunk, dest, config, plot=False): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() np.random.seed(100) # load grasps from database sample_start = time.clock() grasps = chunk.load_grasps(obj.key) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' % (len(grasps))) logging.info('Grasp candidate loading took %f sec' % (sample_duration)) if not grasps: logging.info('Skipping %s' % (obj.key)) return None # load features for all grasps feature_start = time.clock() feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config) all_features = feature_loader.load_all_features( grasps) # in same order as grasps feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' % (len(all_features))) logging.info('Grasp feature loading took %f sec' % (feature_duration)) # bandit params brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), # tc.ConfidenceTerminationCondition(confidence) ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) if config['grasp_symmetry']: def swapped_phi(rv): return rv.swapped_features nn = kernels.SymmetricKDTree(phi=phi, alternate_phi=swapped_phi) kernel = kernels.SymmetricSquaredExponentialKernel( sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi, alternate_phi=swapped_phi) objective = objectives.RandomBinaryObjective() # pre-computed pfc values estimated_pfc = np.array([c.grasp.quality for c in candidates]) # uniform allocation baseline ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation.') ua_result = ua.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling for faster convergence ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) object_grasps = [candidates[i].grasp for i in ts_result.best_candidates] grasp_qualities = list(ts_result.best_pred_means) bandit_stop = time.clock() logging.info('Bandits took %f sec' % (bandit_stop - bandit_start)) # get rotated, translated versions of grasps delay = 0 pr2_grasps = [] pr2_grasp_qualities = [] theta_res = config['grasp_theta_res'] * np.pi # grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps']) if config['vis_grasps']: delay = config['vis_delay'] for grasp, grasp_quality in zip(object_grasps, grasp_qualities): rotated_grasps = grasp.transform(obj.tf, theta_res) # rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay) pr2_grasps.extend(rotated_grasps) pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps)) logging.info('Num grasps: %d' % (len(pr2_grasps))) grasp_filename = os.path.join(dest, obj.key + '.json') with open(grasp_filename, 'w') as f: jsons.dump([ g.to_json(quality=q) for g, q in zip(pr2_grasps, pr2_grasp_qualities) ], f) ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) return BanditCorrelatedExperimentResult(ua_normalized_reward, ts_normalized_reward, ts_corr_normalized_reward, estimated_pfc, ua_result.iters, kernel.matrix(candidates), obj_key=obj.key)
def extract_features(obj, dest, feature_dest, config): # sample grasps sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions']) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps( obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions']) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions']) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Grasp candidate generation took %f sec' % (sample_duration)) if not grasps or len(grasps) == 0: logging.info('Skipping %s' % (obj.key)) return # compute all features feature_start = time.clock() feature_extractor = ff.GraspableFeatureExtractor(obj, config) all_features = feature_extractor.compute_all_features(grasps) feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Feature extraction took %f sec' % (feature_duration)) # generate pfc candidates graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) candidates = [] logging.info('%d grasps, %d valid features', len(grasps), len(all_features) - all_features.count(None)) for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d candidate' % (len(candidates))) if features is None: logging.info('No features computed.') continue grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) pfc_rv.set_features(features) candidates.append(pfc_rv) logging.info('%d candidates', len(candidates)) # brute force with uniform allocation brute_force_iter = config['bandit_brute_force_iter'] snapshot_rate = config['bandit_snapshot_rate'] def phi(rv): return rv.features objective = objectives.RandomBinaryObjective() ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') bandit_start = time.clock() ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) bandit_end = time.clock() bandit_duration = bandit_end - bandit_start logging.info('Uniform allocation (%d iters) took %f sec' % (brute_force_iter, bandit_duration)) cand_grasps = [c.grasp for c in candidates] cand_features = [c.features_ for c in candidates] final_model = ua_result.models[-1] estimated_pfc = models.BetaBernoulliModel.beta_mean( final_model.alphas, final_model.betas) if len(cand_grasps) != len(estimated_pfc): logging.warning( 'Number of grasps does not match estimated pfc results.') IPython.embed() # write to file grasp_filename = os.path.join(dest, obj.key + '.json') with open(grasp_filename, 'w') as grasp_file: jsons.dump([ g.to_json(quality=q, num_successes=a, num_failures=b) for g, q, a, b in zip(cand_grasps, estimated_pfc, final_model.alphas, final_model.betas) ], grasp_file) # HACK to make paths relative features_as_json = [f.to_json(feature_dest) for f in cand_features] output_dest = os.path.split(dest)[0] for feature_as_json in features_as_json: feature_as_json = list(feature_as_json.values())[0] for wname in ('w1', 'w2'): wdata = feature_as_json[wname] for k, v in wdata.items(): wdata[k] = os.path.relpath( v, output_dest) # relative to output_dest feature_filename = os.path.join(feature_dest, obj.key + '.json') with open(feature_filename, 'w') as feature_file: jsons.dump(features_as_json, feature_file)
def label_pfc(obj, dataset, output_dir, config): """ Label an object with grasps according to probability of force closure """ # sample intial antipodal grasps start = time.clock() sampler = ags.AntipodalGraspSampler(config) start_time = time.clock() grasps, alpha_thresh, rho_thresh = sampler.generate_grasps(obj, vis=False) end_time = time.clock() duration = end_time - start_time logging.info('Antipodal grasp candidate generation took %f sec' % (duration)) # partition grasps grasp_partitions = pfc.space_partition_grasps(grasps, config) # bandit params max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), tc.ConfidenceTerminationCondition(confidence) ] # run bandits on each partition object_grasps = [] grasp_qualities = [] i = 0 for grasp_partition in grasp_partitions: logging.info('Finding highest quality grasp in partition %d' % (i)) # create random variables graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm( config['friction_coef'], config['sigma_mu']) # friction gaussian random variable candidates = [] for grasp in grasp_partition: grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) candidates.append( pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)) # run bandits objective = objectives.RandomBinaryObjective() ts = das.ThompsonSampling(objective, candidates) ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) object_grasps.extend([c.grasp for c in ts_result.best_candidates]) grasp_qualities.extend(list(ts_result.best_pred_means)) i = i + 1 stop = time.clock() logging.info('Took %d sec' % (stop - start)) # get rotated, translated versions of grasps delay = 0 pr2_grasps = [] pr2_grasp_qualities = [] theta_res = config['grasp_theta_res'] * np.pi grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps']) i = 0 if config['vis_grasps']: delay = config['vis_delay'] for grasp in object_grasps: print 'Grasp', i rotated_grasps = grasp.transform(obj.tf, theta_res) rotated_grasps = grasp_checker.prune_grasps_in_collision( obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay) pr2_grasps.extend(rotated_grasps) pr2_grasp_qualities.extend([grasp_qualities[i]] * len(rotated_grasps)) i = i + 1 logging.info('Num grasps: %d' % (len(pr2_grasps))) # save grasps locally :( Due to problems with sudo grasp_filename = os.path.join(output_dir, obj.key + '.json') with open(grasp_filename, 'w') as f: jsons.dump([ pr2_grasps[i].to_json(quality=pr2_grasp_qualities[i]) for i in range(len(pr2_grasps)) ], f)