def __init__(self, db, config): self.feature_db = FeatureDatabase(config) self.db = db self.grasp_kernel = kernels.SquaredExponentialKernel( sigma=config['kernel_sigma'], l=config['kernel_l']) self.neighbor_kernel = kernels.SquaredExponentialKernel( sigma=1.0, l=(1 / config['prior_neighbor_weight'])) self.neighbor_distance = config['prior_neighbor_distance'] self.num_neighbors = config['prior_num_neighbors'] self.config = config self.grasp_kernel_tolerance = config['kernel_tolerance'] self.prior_kernel_tolerance = config['prior_kernel_tolerance']
def sample_gp(x, gp=None, alpha=np.empty(0), kerneltype='se', size=1): """ :param array x: input array where GP is to be sampled. :param gp: a `~gp.core.GaussianProcess` class instance. If None, the instance will be constructed using hyperparameter vector alpha and kerneltype. :param array-like alpha: List of hyperparameters to build kernel instance. :param string kerneltype: The kernel type. Options are: 'se' for a squared exponential kernel. 'qper' for a quasiperiodic kernel. :param int size: size of the sample. """ # If a Kernel instance is passed. if isinstance(gp, GaussianProcess): gp.set_test_input(x) return gp.sample(size) # Otherwise, construct instance on the fly. elif kerneltype == 'se': return kernels.SquaredExponentialKernel(alpha).sample(x, size) elif kerneltype == 'ge': return kernels.GeneralisedExponentialKernel(alpha).sample(x, size) elif kerneltype == 'qper': return kernels.QuasiPeriodicKernel(alpha).sample(x, size) else: raise NameError('Kerneltype not recognised.')
def kernel(self, w): def phi(row): return w * row return kernels.SquaredExponentialKernel( sigma=self.config_['kernel_sigma'], l=self.config_['kernel_l'], phi=phi)
def getprobability(object, grasps): obj_name = object[1] sdf_name = object[2] obj_mesh = of.ObjFile(obj_name).read() sdf_ = sf.SdfFile(sdf_name).read() obj = go.GraspableObject3D(sdf_, mesh=obj_mesh, key=object[0].replace("_features.txt", ""), model_name=obj_name) config_name = "cfg/correlated.yaml" config = ec.ExperimentConfig(config_name) np.random.seed(100) brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), # tc.ConfidenceTerminationCondition(confidence) ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV # compute feature vectors for all grasps feature_extractor = ff.GraspableFeatureExtractor(obj, config) all_features = feature_extractor.compute_all_features(grasps) candidates = [] for grasp, features in zip(grasps, all_features): grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: pass else: pfc_rv.set_features(features) candidates.append(pfc_rv) def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # uniform allocation for true values ua = das.UniformAllocationMean(objective, candidates) ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) return estimated_pfc
def plot_kernels_for_key(obj, chunk, config, priors_dataset=None, nearest_features_name=None): candidates = load_candidate_grasps(obj, chunk) if priors_dataset is None: priors_dataset = chunk prior_engine = pce.PriorComputationEngine(priors_dataset, config) if nearest_features_name == None: neighbor_keys, all_neighbor_kernels, all_neighbor_pfc_diffs, all_distances = prior_engine.compute_grasp_kernels( obj, candidates) else: neighbor_keys, all_neighbor_kernels, all_neighbor_pfc_diffs, all_distances = prior_engine.compute_grasp_kernels( obj, candidates, nearest_features_name=nearest_features_name) for neighbor_key, object_distance in zip(neighbor_keys, all_distances): print '%s and %s: %.5f' % (obj.key, neighbor_key, object_distance) # feature transform def phi(rv): return rv.features kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) estimated_pfc = np.array([c.grasp.quality for c in candidates]) k = kernel.matrix(candidates) k_vec = k.ravel() pfc_arr = np.array([estimated_pfc]).T pfc_diff = ssd.squareform(ssd.pdist(pfc_arr)) pfc_vec = pfc_diff.ravel() bad_ind = np.where(pfc_diff > 1.0 - k) labels = [obj.key[:15]] + map(lambda x: x[:15], neighbor_keys) scatter_objs = [] plt.figure() colors = plt.get_cmap('hsv')(np.linspace(0.5, 1.0, len(all_neighbor_pfc_diffs))) scatter_objs.append(plt.scatter(k_vec, pfc_vec, c='#eeeeff')) for i, (neighbor_pfc_diffs, neighbor_kernels) in enumerate( zip(all_neighbor_pfc_diffs, all_neighbor_kernels)): scatter_objs.append( plt.scatter(neighbor_kernels, neighbor_pfc_diffs, c=colors[i])) plt.xlabel('Kernel', fontsize=15) plt.ylabel('PFC Diff', fontsize=15) plt.title('Correlations', fontsize=15) plt.legend(scatter_objs, labels)
def test_correlated_thompson_sampling(num_candidates=NUM_CANDIDATES, sig=1.0, eps=0.5): # get candidates actual_means = np.linspace(0.0, 1.0, num=num_candidates) candidates = [BernoulliRV(m) for m in actual_means] # get true maximum true_max = np.max(actual_means) true_max_indices = np.where(actual_means == true_max) # constructing nearest neighbor and kernel def phi(bern): return np.array([round(bern.p(), 2)]) nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=sig, phi=phi) # solve using Thompson sampling obj = objectives.RandomBinaryObjective() ts = CorrelatedThompsonSampling(obj, candidates, nn, kernel, tolerance=eps) result = ts.solve( termination_condition=tc.MaxIterTerminationCondition(MAX_ITERS), snapshot_rate=SNAPSHOT_RATE) # check result (not guaranteed to work in finite iterations but whatever) assert len(result.best_candidates) == 1 assert np.abs(result.best_candidates[0].p() - true_max) < 1e-4 logging.info('Correlated Thompson sampling test passed!') logging.info('Took %f sec' % (result.total_time)) logging.info('Best index %d' % (true_max_indices[0])) info = u' (σ=%.1f, ɛ=%.3f)' % (sig, eps) # visualize result plot_num_pulls(result) plt.title('Observations Per Variable for Correlated Thompson Sampling' + info) plot_value_vs_time(result, candidates, true_max) plt.title('P(Success) versus Iterations for Correlated Thompson Sampling' + info) return result
def label_correlated(obj, chunk, dest, config, plot=False, load=True): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() #np.random.seed(100) # sample grasps sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps(obj, check_collisions=config['check_collisions'], vis=False) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps(obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions'], vis=plot) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps(obj, check_collisions=config['check_collisions'], vis=plot, grasp_gen_mult = 6) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' %(len(grasps))) logging.info('Grasp candidate loading took %f sec' %(sample_duration)) if not grasps: logging.info('Skipping %s' %(obj.key)) return None # extract load features for all grasps feature_start = time.clock() feature_extractor = ff.GraspableFeatureExtractor(obj, config) all_features = feature_extractor.compute_all_features(grasps) feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' %(len(all_features))) logging.info('Grasp feature loading took %f sec' %(feature_duration)) # bandit params num_trials = config['num_trials'] brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] brute_snapshot_rate = config['bandit_brute_force_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] # set up randome variables graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' %len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features # create nn structs for kernels nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel( sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # uniform allocation for true values ua_brute = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_brute_result = ua_brute.solve(termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=brute_snapshot_rate) final_model = ua_brute_result.models[-1] estimated_pfc = models.BetaBernoulliModel.beta_mean(final_model.alphas, final_model.betas) save_grasps(grasps, estimated_pfc, obj, dest, num_successes=final_model.alphas, num_failures=final_model.betas) # run bandits for several trials ua_rewards = [] ts_rewards = [] ts_corr_rewards = [] for t in range(num_trials): logging.info('Trial %d' %(t)) # Uniform sampling ua = das.UniformAllocationMean(objective, candidates) logging.info('Running Uniform allocation.') ua_result = ua.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # compile results ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) ua_rewards.append(ua_normalized_reward) ts_rewards.append(ts_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) # get the bandit rewards all_ua_rewards = np.array(ua_rewards) all_ts_rewards = np.array(ts_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) # compute avg normalized rewards avg_ua_rewards = np.mean(all_ua_rewards, axis=0) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) # kernel matrix kernel_matrix = kernel.matrix(candidates) return BanditCorrelatedExperimentResult(avg_ua_rewards, avg_ts_rewards, avg_ts_corr_rewards, estimated_pfc, ua_result.iters, kernel_matrix, obj_key=obj.key)
def test_window_correlation(width, num_steps, vis=True): import scipy import sdf_file, obj_file import discrete_adaptive_samplers as das import experiment_config as ec import feature_functions as ff import graspable_object as go # weird Python issues import kernels import models import objectives import pfc import termination_conditions as tc np.random.seed(100) mesh_file_name = 'data/test/meshes/Co_clean.obj' sdf_3d_file_name = 'data/test/sdf/Co_clean.sdf' config = ec.ExperimentConfig('cfg/correlated.yaml') config['window_width'] = width config['window_steps'] = num_steps brute_force_iter = 100 snapshot_rate = config['bandit_snapshot_rate'] sdf = sdf_file.SdfFile(sdf_3d_file_name).read() mesh = obj_file.ObjFile(mesh_file_name).read() graspable = go.GraspableObject3D(sdf, mesh) grasp_axis = np.array([0, 1, 0]) grasp_width = 0.1 grasps = [] for z in [-0.030, -0.035, -0.040, -0.045]: grasp_center = np.array([0, 0, z]) grasp = g.ParallelJawPtGrasp3D( ParallelJawPtGrasp3D.configuration_from_params( grasp_center, grasp_axis, grasp_width)) grasps.append(grasp) graspable_rv = pfc.GraspableObjectGaussianPose(graspable, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV # compute feature vectors for all grasps feature_extractor = ff.GraspableFeatureExtractor(graspable, config) all_features = feature_extractor.compute_all_features(grasps) candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) pfc_rv.set_features(features) candidates.append(pfc_rv) if vis: _, (c1, c2) = grasp.close_fingers(graspable) plt.figure() c1_proxy = c1.plot_friction_cone(color='m') c2_proxy = c2.plot_friction_cone(color='y') plt.legend([c1_proxy, c2_proxy], ['Cone 1', 'Cone 2']) plt.title('Grasp %d' % (len(candidates))) objective = objectives.RandomBinaryObjective() ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) print 'true pfc' print estimated_pfc def phi(rv): return rv.features kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) print 'kernel matrix' print kernel.matrix(candidates) if vis: plt.show()
def label_correlated(obj, chunk, config, plot=False, priors_dataset=None, nearest_features_names=None): """Label an object with grasps according to probability of force closure, using correlated bandits.""" # bandit params num_trials = config['num_trials'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] bandit_start = time.clock() np.random.seed(100) candidates = load_candidate_grasps(obj, chunk) if candidates is None: return None # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # compute priors logging.info('Computing priors') if priors_dataset is None: priors_dataset = chunk prior_engine = pce.PriorComputationEngine(priors_dataset, config) # Compute priors all_alpha_priors = [] all_beta_priors = [] prior_comp_times = [] if nearest_features_names == None: alpha_priors, beta_priors = prior_engine.compute_priors( obj, candidates) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) else: for nearest_features_name in nearest_features_names: logging.info('Computing priors using %s' % (nearest_features_name)) priors_start_time = time.time() alpha_priors, beta_priors, neighbor_keys, neighbor_distances, neighbor_kernels, neighbor_pfc_diffs, num_grasp_neighbors = \ prior_engine.compute_priors(obj, candidates, nearest_features_name=nearest_features_name) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) priors_end_time = time.time() prior_comp_times.append(priors_end_time - priors_start_time) logging.info( 'Priors for %s took %f' % (nearest_features_name, priors_end_time - priors_start_time)) # pre-computed pfc values logging.info('Computing regression errors') true_pfc = np.array([c.grasp.quality for c in candidates]) prior_alphas = np.ones(true_pfc.shape) prior_betas = np.ones(true_pfc.shape) prior_pfc = 0.5 * np.ones(true_pfc.shape) ce_loss = objectives.CrossEntropyLoss(true_pfc) se_loss = objectives.SquaredErrorLoss(true_pfc) we_loss = objectives.WeightedSquaredErrorLoss(true_pfc) ccbp_ll = objectives.CCBPLogLikelihood(true_pfc) ce_vals = [ce_loss(prior_pfc)] se_vals = [se_loss(prior_pfc)] we_vals = [se_loss(prior_pfc)] # uniform weights at first ccbp_vals = [ccbp_ll.evaluate(prior_alphas, prior_betas)] total_weights = [len(candidates)] # compute estimated pfc values from alphas and betas for alpha_prior, beta_prior in zip(all_alpha_priors, all_beta_priors): estimated_pfc = models.BetaBernoulliModel.beta_mean( np.array(alpha_prior), np.array(beta_prior)) estimated_vars = models.BetaBernoulliModel.beta_variance( np.array(alpha_prior), np.array(beta_prior)) # compute losses ce_vals.append(ce_loss(estimated_pfc)) se_vals.append(se_loss(estimated_pfc)) we_vals.append(we_loss.evaluate(estimated_pfc, estimated_vars)) ccbp_vals.append( ccbp_ll.evaluate(np.array(alpha_prior), np.array(beta_prior))) total_weights.append(np.sum(estimated_vars)) ce_vals = np.array(ce_vals) se_vals = np.array(se_vals) we_vals = np.array(we_vals) ccbp_vals = np.array(ccbp_vals) total_weights = np.array(total_weights) # setup reward buffers ua_rewards = [] ts_rewards = [] gi_rewards = [] ts_corr_rewards = [] bucb_corr_rewards = [] all_ts_corr_prior_rewards = [] for x in range(0, len(all_alpha_priors)): all_ts_corr_prior_rewards.append([]) all_bucb_corr_prior_rewards = [] for x in range(0, len(all_alpha_priors)): all_bucb_corr_prior_rewards.append([]) # setup runtime buffers ua_runtimes = [] ts_runtimes = [] gi_runtimes = [] ts_corr_runtimes = [] bucb_corr_runtimes = [] all_ts_corr_prior_runtimes = [] for x in range(0, len(all_alpha_priors)): all_ts_corr_prior_runtimes.append([]) all_bucb_corr_prior_runtimes = [] for x in range(0, len(all_alpha_priors)): all_bucb_corr_prior_runtimes.append([]) # run bandits for several trials logging.info('Running bandits') for t in range(num_trials): logging.info('Trial %d' % (t)) # Uniform sampling ua = das.UniformAllocationMean(objective, candidates) logging.info('Running Uniform allocation.') ua_result = ua.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Gittins indices gi = das.GittinsIndex98(objective, candidates) logging.info('Running Gittins Indices.') gi_result = gi.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], p=config['lb_alpha']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence bucb_corr = das.CorrelatedGittins( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], p=config['lb_alpha']) #horizon=max_iter) logging.info('Running correlated Bayes UCB.') bucb_corr_result = bucb_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated MAB for faster convergence all_ts_corr_prior_ind = [] all_bucb_corr_prior_ind = [] for alpha_priors, beta_priors, ts_corr_prior_rewards, bucb_corr_prior_rewards, ts_corr_runtimes, bucb_corr_runtimes, nearest_features_name in \ zip(all_alpha_priors, all_beta_priors, all_ts_corr_prior_rewards, all_bucb_corr_prior_rewards, all_ts_corr_prior_runtimes, all_bucb_corr_prior_runtimes, nearest_features_names): # thompson sampling ts_corr_prior = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], alpha_prior=alpha_priors, beta_prior=beta_priors, p=config['lb_alpha']) logging.info( 'Running correlated Thompson sampling with priors from %s' % (nearest_features_name)) ts_corr_prior_result = ts_corr_prior.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) ts_corr_prior_normalized_reward = reward_vs_iters( ts_corr_prior_result, true_pfc) ts_corr_prior_rewards.append(ts_corr_prior_normalized_reward) ts_corr_runtimes.append(ts_corr_prior_result.total_time) all_ts_corr_prior_ind.append(ts_corr_prior_result.best_pred_ind) # bayes ucb bucb_corr = das.CorrelatedGittins( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], #horizon=max_iter, alpha_prior=alpha_priors, beta_prior=beta_priors, p=config['lb_alpha']) logging.info('Running correlated Bayes UCB with priors from %s' % (nearest_features_name)) bucb_corr_prior_result = bucb_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) bucb_corr_prior_normalized_reward = reward_vs_iters( bucb_corr_prior_result, true_pfc) bucb_corr_prior_rewards.append(bucb_corr_prior_normalized_reward) bucb_corr_runtimes.append(bucb_corr_prior_result.total_time) all_bucb_corr_prior_ind.append( bucb_corr_prior_result.best_pred_ind) # compile results ua_normalized_reward = reward_vs_iters(ua_result, true_pfc) ts_normalized_reward = reward_vs_iters(ts_result, true_pfc) gi_normalized_reward = reward_vs_iters(gi_result, true_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, true_pfc) bucb_corr_normalized_reward = reward_vs_iters(bucb_corr_result, true_pfc) ua_rewards.append(ua_normalized_reward) ts_rewards.append(ts_normalized_reward) gi_rewards.append(gi_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) bucb_corr_rewards.append(bucb_corr_normalized_reward) ua_runtimes.append(ua_result.total_time) ts_runtimes.append(ts_result.total_time) gi_runtimes.append(gi_result.total_time) ts_corr_runtimes.append(ts_corr_result.total_time) bucb_corr_runtimes.append(bucb_corr_result.total_time) if num_trials == 0: return None # get the bandit rewards all_ua_rewards = np.array(ua_rewards) all_ts_rewards = np.array(ts_rewards) all_gi_rewards = np.array(gi_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) all_bucb_corr_rewards = np.array(bucb_corr_rewards) all_avg_ts_corr_prior_rewards = [] for ts_corr_prior_rewards in all_ts_corr_prior_rewards: all_avg_ts_corr_prior_rewards.append( np.mean(np.array(ts_corr_prior_rewards), axis=0)) all_avg_bucb_corr_prior_rewards = [] for bucb_corr_prior_rewards in all_bucb_corr_prior_rewards: all_avg_bucb_corr_prior_rewards.append( np.mean(np.array(bucb_corr_prior_rewards), axis=0)) #all_avg_bucb_corr_prior_rewards.append([]) # get bandit indices ua_ind = ua_result.best_pred_ind ts_ind = ts_result.best_pred_ind ts_corr_ind = ts_corr_result.best_pred_ind bucb_corr_ind = bucb_corr_result.best_pred_ind # compute avg normalized rewards avg_ua_rewards = np.mean(all_ua_rewards, axis=0) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_gi_rewards = np.mean(all_gi_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) avg_bucb_corr_rewards = np.mean(all_bucb_corr_rewards, axis=0) #avg_bucb_corr_rewards = all_bucb_corr_rewards # compute avg runtimes avg_ua_runtimes = np.mean(np.array(ua_runtimes), axis=0) avg_ts_runtimes = np.mean(np.array(ts_runtimes), axis=0) avg_ts_corr_runtimes = np.mean(np.array(ts_corr_runtimes), axis=0) avg_bucb_corr_runtimes = np.mean(np.array(bucb_corr_runtimes), axis=0) all_avg_ts_corr_prior_runtimes = [] for ts_corr_prior_runtimes in all_ts_corr_prior_runtimes: all_avg_ts_corr_prior_runtimes.append( np.mean(np.array(ts_corr_prior_runtimes), axis=0)) all_avg_bucb_corr_prior_runtimes = [] for bucb_corr_prior_runtimes in all_bucb_corr_prior_runtimes: all_avg_bucb_corr_prior_runtimes.append( np.mean(np.array(bucb_corr_prior_runtimes), axis=0)) # kernel matrix kernel_matrix = kernel.matrix(candidates) return BanditCorrelatedPriorExperimentResult( avg_ua_rewards, avg_ts_rewards, avg_gi_rewards, avg_ts_corr_rewards, avg_bucb_corr_rewards, all_avg_ts_corr_prior_rewards, all_avg_bucb_corr_prior_rewards, true_pfc, ua_result.iters, kernel_matrix, [], [], [], ce_vals, ccbp_vals, we_vals, len(candidates), total_weights, ua_ind, ts_ind, ts_corr_ind, bucb_corr_ind, all_ts_corr_prior_ind, all_bucb_corr_prior_ind, avg_ua_runtimes, avg_ts_runtimes, avg_ts_corr_runtimes, avg_bucb_corr_runtimes, all_avg_ts_corr_prior_runtimes, all_avg_bucb_corr_prior_runtimes, prior_comp_times, obj_key=obj.key, neighbor_keys=neighbor_keys)
def eval_hyperparams(obj, chunk, config, plot=False, priors_dataset=None, nearest_features_names=None): """Label an object with grasps according to probability of force closure, using correlated bandits.""" # bandit params num_trials = config['num_trials'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] bandit_start = time.clock() np.random.seed(100) candidates = load_candidate_grasps(obj, chunk) if candidates is None: return None # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # compute priors logging.info('Computing priors') if priors_dataset is None: priors_dataset = chunk prior_engine = pce.PriorComputationEngine(priors_dataset, config) # Compute priors all_alpha_priors = [] all_beta_priors = [] prior_comp_times = [] if nearest_features_names == None: alpha_priors, beta_priors = prior_engine.compute_priors( obj, candidates) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) else: for nearest_features_name in nearest_features_names: logging.info('Computing priors using %s' % (nearest_features_name)) priors_start_time = time.time() alpha_priors, beta_priors, neighbor_keys, neighbor_distances, neighbor_kernels, neighbor_pfc_diffs, num_grasp_neighbors = \ prior_engine.compute_priors(obj, candidates, nearest_features_name=nearest_features_name) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) priors_end_time = time.time() prior_comp_times.append(priors_end_time - priors_start_time) logging.info( 'Priors for %s took %f' % (nearest_features_name, priors_end_time - priors_start_time)) # pre-computed pfc values logging.info('Computing regression errors') true_pfc = np.array([c.grasp.quality for c in candidates]) prior_alphas = np.ones(true_pfc.shape) prior_betas = np.ones(true_pfc.shape) prior_pfc = 0.5 * np.ones(true_pfc.shape) ce_loss = objectives.CrossEntropyLoss(true_pfc) se_loss = objectives.SquaredErrorLoss(true_pfc) we_loss = objectives.WeightedSquaredErrorLoss(true_pfc) ccbp_ll = objectives.CCBPLogLikelihood(true_pfc) ce_vals = [ce_loss(prior_pfc)] se_vals = [se_loss(prior_pfc)] we_vals = [se_loss(prior_pfc)] # uniform weights at first ccbp_vals = [ccbp_ll.evaluate(prior_alphas, prior_betas)] total_weights = [len(candidates)] # compute estimated pfc values from alphas and betas for alpha_prior, beta_prior in zip(all_alpha_priors, all_beta_priors): estimated_pfc = models.BetaBernoulliModel.beta_mean( np.array(alpha_prior), np.array(beta_prior)) estimated_vars = models.BetaBernoulliModel.beta_variance( np.array(alpha_prior), np.array(beta_prior)) # compute losses ce_vals.append(ce_loss(estimated_pfc)) se_vals.append(se_loss(estimated_pfc)) we_vals.append(we_loss.evaluate(estimated_pfc, estimated_vars)) ccbp_vals.append( ccbp_ll.evaluate(np.array(alpha_prior), np.array(beta_prior))) total_weights.append(np.sum(estimated_vars)) ce_vals = np.array(ce_vals) se_vals = np.array(se_vals) we_vals = np.array(we_vals) ccbp_vals = np.array(ccbp_vals) total_weights = np.array(total_weights) # create hyperparam dict num_grasps = len(candidates) hyperparams = {} hyperparams['weight_grad'] = config['weight_grad_x'] hyperparams['weight_moment'] = config['weight_gravity'] hyperparams['weight_shape'] = config['prior_neighbor_weight'] hyperparams['num_neighbors'] = config['prior_num_neighbors'] return HyperparamEvalResult(ce_vals, se_vals, we_vals, ccbp_vals, num_grasps, total_weights, hyperparams, prior_comp_times, obj_key=obj.key, neighbor_keys=neighbor_keys)
def label_correlated(obj, chunk, dest, config, plot=False, load=True): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() np.random.seed(100) chunk = db.Chunk(config) if not load: # load grasps from database sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions'], vis=plot) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps( obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions'], vis=plot) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions'], vis=plot, grasp_gen_mult=6) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' % (len(grasps))) logging.info('Grasp candidate loading took %f sec' % (sample_duration)) if not grasps: logging.info('Skipping %s' % (obj.key)) return None else: grasps = load_grasps(obj, dest) grasps = grasps[:20] # grasps = chunk.load_grasps(obj.key) # load features for all grasps feature_start = time.clock() feature_extractor = ff.GraspableFeatureExtractor(obj, config) features = feature_extractor.compute_all_features(grasps) """ if not load: features = feature_extractor.compute_all_features(grasps) else: feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config) features = feature_loader.load_all_features(grasps) # in same order as grasps """ feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' % (len(features))) logging.info('Grasp feature loading took %f sec' % (feature_duration)) # prune crappy grasps all_features = [] all_grasps = [] for grasp, feature in zip(grasps, features): if feature is not None: all_grasps.append(grasp) all_features.append(feature) grasps = all_grasps # compute distances for debugging distances = np.zeros([len(grasps), len(grasps)]) i = 0 for feature_i in all_features: j = 0 for feature_j in all_features: distances[i, j] = np.linalg.norm(feature_i.phi - feature_j.phi) j += 1 i += 1 # bandit params brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() if not load: # uniform allocation for true values ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition( brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) save_grasps(grasps, estimated_pfc, obj, dest) # plot params line_width = config['line_width'] font_size = config['font_size'] dpi = config['dpi'] # plot histograms num_bins = 100 bin_edges = np.linspace(0, 1, num_bins + 1) plt.figure() n, bins, patches = plt.hist(estimated_pfc, bin_edges) plt.xlabel('Probability of Success', fontsize=font_size) plt.ylabel('Num Grasps', fontsize=font_size) plt.title('Histogram of Grasps by Probability of Success', fontsize=font_size) plt.show() exit(0) else: estimated_pfc = np.array([g.quality for g in grasps]) # debugging for examining bad features bad_i = 0 bad_j = 1 grasp_i = grasps[bad_i] grasp_j = grasps[bad_j] pfc_i = estimated_pfc[bad_i] pfc_j = estimated_pfc[bad_j] features_i = all_features[bad_i] features_j = all_features[bad_j] feature_sq_diff = (features_i.phi - features_j.phi)**2 # grasp_i.close_fingers(obj, vis=True) # grasp_j.close_fingers(obj, vis=True) grasp_i.surface_information(obj, config['window_width'], config['window_steps']) grasp_j.surface_information(obj, config['window_width'], config['window_steps']) w = config['window_steps'] wi1 = np.reshape(features_i.extractors_[0].extractors_[1].phi, [w, w]) wi2 = np.reshape(features_i.extractors_[1].extractors_[1].phi, [w, w]) wj1 = np.reshape(features_j.extractors_[0].extractors_[1].phi, [w, w]) wj2 = np.reshape(features_j.extractors_[1].extractors_[1].phi, [w, w]) a = 0.1 plt.figure() plt.subplot(2, 2, 1) plt.imshow(wi1, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wi1') plt.subplot(2, 2, 2) plt.imshow(wi2, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wi2') plt.subplot(2, 2, 3) plt.imshow(wj1, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wj1') plt.subplot(2, 2, 4) plt.imshow(wj2, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wj2') # plt.show() # IPython.embed() num_trials = config['num_trials'] ts_rewards = [] ts_corr_rewards = [] for t in range(num_trials): logging.info('Trial %d' % (t)) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) ts_rewards.append(ts_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) # get the bandit rewards all_ts_rewards = np.array(ts_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) # get correlations and plot k = kernel.matrix(candidates) k_vec = k.ravel() pfc_arr = np.array([estimated_pfc]).T pfc_diff = ssd.squareform(ssd.pdist(pfc_arr)) pfc_vec = pfc_diff.ravel() bad_ind = np.where(pfc_diff > 1.0 - k) plt.figure() plt.scatter(k_vec, pfc_vec) plt.xlabel('Kernel', fontsize=15) plt.ylabel('PFC Diff', fontsize=15) plt.title('Correlations', fontsize=15) # plt.show() # IPython.embed() # plot params line_width = config['line_width'] font_size = config['font_size'] dpi = config['dpi'] # plot histograms num_bins = 100 bin_edges = np.linspace(0, 1, num_bins + 1) plt.figure() n, bins, patches = plt.hist(estimated_pfc, bin_edges) plt.xlabel('Probability of Success', fontsize=font_size) plt.ylabel('Num Grasps', fontsize=font_size) plt.title('Histogram of Grasps by Probability of Success', fontsize=font_size) # plot the results plt.figure() plt.plot(ts_result.iters, avg_ts_rewards, c=u'g', linewidth=line_width, label='Thompson Sampling (Uncorrelated)') plt.plot(ts_corr_result.iters, avg_ts_corr_rewards, c=u'r', linewidth=line_width, label='Thompson Sampling (Correlated)') plt.xlim(0, np.max(ts_result.iters)) plt.ylim(0.5, 1) plt.xlabel('Iteration', fontsize=font_size) plt.ylabel('Normalized Probability of Force Closure', fontsize=font_size) plt.title('Avg Normalized PFC vs Iteration', fontsize=font_size) handles, labels = plt.gca().get_legend_handles_labels() plt.legend(handles, labels, loc='lower right') plt.show() IPython.embed() """ # aggregate grasps object_grasps = [candidates[i].grasp for i in ts_result.best_candidates] grasp_qualities = list(ts_result.best_pred_means) bandit_stop = time.clock() logging.info('Bandits took %f sec' %(bandit_stop - bandit_start)) # get rotated, translated versions of grasps delay = 0 pr2_grasps = [] pr2_grasp_qualities = [] theta_res = config['grasp_theta_res'] * np.pi # grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps']) if config['vis_grasps']: delay = config['vis_delay'] for grasp, grasp_quality in zip(object_grasps, grasp_qualities): rotated_grasps = grasp.transform(obj.tf, theta_res) # rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay) pr2_grasps.extend(rotated_grasps) pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps)) logging.info('Num grasps: %d' %(len(pr2_grasps))) grasp_filename = os.path.join(dest, obj.key + '.json') with open(grasp_filename, 'w') as f: jsons.dump([g.to_json(quality=q) for g, q in zip(pr2_grasps, pr2_grasp_qualities)], f) ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) return BanditCorrelatedExperimentResult(ua_normalized_reward, ts_normalized_reward, ts_corr_normalized_reward, ua_result, ts_result, ts_corr_result, obj_key=obj.key) """ return None
def label_correlated(obj, chunk, dest, config, plot=False): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() np.random.seed(100) # load grasps from database sample_start = time.clock() grasps = chunk.load_grasps(obj.key) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' % (len(grasps))) logging.info('Grasp candidate loading took %f sec' % (sample_duration)) if not grasps: logging.info('Skipping %s' % (obj.key)) return None # load features for all grasps feature_start = time.clock() feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config) all_features = feature_loader.load_all_features( grasps) # in same order as grasps feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' % (len(all_features))) logging.info('Grasp feature loading took %f sec' % (feature_duration)) # bandit params brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), # tc.ConfidenceTerminationCondition(confidence) ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) if config['grasp_symmetry']: def swapped_phi(rv): return rv.swapped_features nn = kernels.SymmetricKDTree(phi=phi, alternate_phi=swapped_phi) kernel = kernels.SymmetricSquaredExponentialKernel( sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi, alternate_phi=swapped_phi) objective = objectives.RandomBinaryObjective() # pre-computed pfc values estimated_pfc = np.array([c.grasp.quality for c in candidates]) # uniform allocation baseline ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation.') ua_result = ua.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling for faster convergence ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) object_grasps = [candidates[i].grasp for i in ts_result.best_candidates] grasp_qualities = list(ts_result.best_pred_means) bandit_stop = time.clock() logging.info('Bandits took %f sec' % (bandit_stop - bandit_start)) # get rotated, translated versions of grasps delay = 0 pr2_grasps = [] pr2_grasp_qualities = [] theta_res = config['grasp_theta_res'] * np.pi # grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps']) if config['vis_grasps']: delay = config['vis_delay'] for grasp, grasp_quality in zip(object_grasps, grasp_qualities): rotated_grasps = grasp.transform(obj.tf, theta_res) # rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay) pr2_grasps.extend(rotated_grasps) pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps)) logging.info('Num grasps: %d' % (len(pr2_grasps))) grasp_filename = os.path.join(dest, obj.key + '.json') with open(grasp_filename, 'w') as f: jsons.dump([ g.to_json(quality=q) for g, q in zip(pr2_grasps, pr2_grasp_qualities) ], f) ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) return BanditCorrelatedExperimentResult(ua_normalized_reward, ts_normalized_reward, ts_corr_normalized_reward, estimated_pfc, ua_result.iters, kernel.matrix(candidates), obj_key=obj.key)