def getprobability(object, grasps): obj_name = object[1] sdf_name = object[2] obj_mesh = of.ObjFile(obj_name).read() sdf_ = sf.SdfFile(sdf_name).read() obj = go.GraspableObject3D(sdf_, mesh=obj_mesh, key=object[0].replace("_features.txt", ""), model_name=obj_name) config_name = "cfg/correlated.yaml" config = ec.ExperimentConfig(config_name) np.random.seed(100) brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), # tc.ConfidenceTerminationCondition(confidence) ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV # compute feature vectors for all grasps feature_extractor = ff.GraspableFeatureExtractor(obj, config) all_features = feature_extractor.compute_all_features(grasps) candidates = [] for grasp, features in zip(grasps, all_features): grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: pass else: pfc_rv.set_features(features) candidates.append(pfc_rv) def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # uniform allocation for true values ua = das.UniformAllocationMean(objective, candidates) ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) return estimated_pfc
def test_correlated_thompson_sampling(num_candidates=NUM_CANDIDATES, sig=1.0, eps=0.5): # get candidates actual_means = np.linspace(0.0, 1.0, num=num_candidates) candidates = [BernoulliRV(m) for m in actual_means] # get true maximum true_max = np.max(actual_means) true_max_indices = np.where(actual_means == true_max) # constructing nearest neighbor and kernel def phi(bern): return np.array([round(bern.p(), 2)]) nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=sig, phi=phi) # solve using Thompson sampling obj = objectives.RandomBinaryObjective() ts = CorrelatedThompsonSampling(obj, candidates, nn, kernel, tolerance=eps) result = ts.solve( termination_condition=tc.MaxIterTerminationCondition(MAX_ITERS), snapshot_rate=SNAPSHOT_RATE) # check result (not guaranteed to work in finite iterations but whatever) assert len(result.best_candidates) == 1 assert np.abs(result.best_candidates[0].p() - true_max) < 1e-4 logging.info('Correlated Thompson sampling test passed!') logging.info('Took %f sec' % (result.total_time)) logging.info('Best index %d' % (true_max_indices[0])) info = u' (σ=%.1f, ɛ=%.3f)' % (sig, eps) # visualize result plot_num_pulls(result) plt.title('Observations Per Variable for Correlated Thompson Sampling' + info) plot_value_vs_time(result, candidates, true_max) plt.title('P(Success) versus Iterations for Correlated Thompson Sampling' + info) return result
def __init__(self, feature_db, pca_components=10, feature_vectors=None, svd=None, neighbor_tree=None, neighbor_data=None): if feature_vectors == None: feature_vectors = feature_db.feature_vectors() #feature_db.train_feature_vectors() self.pca_components = pca_components if svd == None: self.svd = self._create_train_svd(feature_vectors, pca_components) data_size = len(feature_vectors.keys()) if self.pca_components > data_size: self.pca_components = data_size else: self.svd = svd if neighbor_tree == None: start = time.time() data = self._project_feature_vectors(feature_vectors) self.neighbors = self._create_neighbors(data) end = time.time() print 'TIME: %0.4f' % (end - start) else: self.neighbors = kernels.KDTree(phi=lambda x: x.feature_vector) self.neighbors.tree_ = neighbor_tree self.neighbors.data_ = neighbor_data
def _compute_priors_with_neighbor_vectors(self, obj, feature_vector, candidates, neighbor_vector_dict, grasp_transfer_method=0, alpha_prior=1.0, beta_prior=1.0): # load in all grasps and features logging.info('Loading features...') start_time = time.time() # transfer features logging.info('Using grasp transfer method %d' % (grasp_transfer_method)) if grasp_transfer_method == self.GRASP_TRANSFER_METHOD_SHOT: reg_solver_dict = {} for neighbor_key in neighbor_vector_dict: reg_solver_dict[neighbor_key] = self._registration_solver( obj, neighbor_obj) self.reg_solver_dict = reg_solver_dict elif grasp_transfer_method == self.GRASP_TRANSFER_METHOD_SCALE_XYZ: self._create_feature_scales_xyz(obj, neighbor_vector_dict.keys()) elif grasp_transfer_method == self.GRASP_TRANSFER_METHOD_SCALE_SINGLE: self._create_feature_scales_single(obj, neighbor_vector_dict.keys()) transfer_time = time.time() logging.info('Grasp transfer precomp took %f sec' % (transfer_time - start_time)) # extract all neighbor properties neighbor_key_list = neighbor_vector_dict.keys() neighbor_index_list = [] neighbor_feature_list = [] neighbor_grasp_list = [] neighbor_kernel_list = [] neighbor_distance_list = [] for k, neighbor_key in enumerate(neighbor_vector_dict): if neighbor_key == obj.key: continue logging.info('Loading features for %s' % (neighbor_key)) neighbor_obj = self.db[neighbor_key] grasps, neighbor_features = self._load_grasps_and_features( neighbor_obj) # put stuff in a list neighbor_index_list.extend([k] * len(neighbor_features)) neighbor_kernel = self.neighbor_kernel.evaluate( feature_vector, neighbor_vector_dict[neighbor_key]) neighbor_distance = np.linalg.norm( feature_vector - neighbor_vector_dict[neighbor_key]) logging.info('Distance %f' % (neighbor_distance)) if neighbor_kernel < self.prior_kernel_tolerance: neighbor_kernel = 0 neighbor_kernel_list.extend([neighbor_kernel] * len(neighbor_features)) neighbor_distance_list.append(neighbor_distance) neighbor_grasp_list.extend(grasps) # transfer all features f_list = [] for features, grasp in zip(neighbor_features, grasps): f_list.append( self._transfer_features(features, grasp, neighbor_key, grasp_transfer_method)) neighbor_feature_list.extend([f.phi for f in f_list]) lf_time = time.time() logging.info('Loading features took %f sec' % (lf_time - transfer_time)) # create nn struct with neighbor features def phi(x): return x error_radius = self.grasp_kernel.error_radius( self.grasp_kernel_tolerance) nn = kernels.KDTree(phi=phi) nn.train(neighbor_feature_list) logging.info('Num total features %d' % (len(neighbor_feature_list))) # create priors using the nn struct logging.info('Creating priors') prior_compute_start = time.clock() all_neighbor_kernels = [[]] * len(neighbor_key_list) all_neighbor_pfc_diffs = [[]] * len(neighbor_key_list) alpha_priors = [] beta_priors = [] num_neighbors = [] out_rate = 50 for k, candidate in enumerate(candidates): alpha = alpha_prior beta = beta_prior if k % out_rate == 0: logging.info('Creating priors for candidate %d' % (k)) # get neighbors within distance and compute kernels neighbor_indices, _ = nn.within_distance(candidate.features, error_radius, return_indices=True) num_neighbors.append(len(neighbor_indices)) for index in neighbor_indices: successes = neighbor_grasp_list[index].successes failures = neighbor_grasp_list[index].failures object_kernel = neighbor_kernel_list[index] grasp_kernel = self.grasp_kernel(candidate.features, neighbor_feature_list[index]) kernel_val = object_kernel * grasp_kernel all_neighbor_kernels[neighbor_index_list[index]].append( kernel_val) all_neighbor_pfc_diffs[neighbor_index_list[index]].append( abs(candidate.grasp.quality - neighbor_grasp_list[index].quality)) alpha += kernel_val * successes beta += kernel_val * failures alpha_priors.append(alpha) beta_priors.append(beta) prior_compute_end = time.clock() logging.info('Created priors in %f sec' % (prior_compute_end - prior_compute_start)) return alpha_priors, beta_priors, neighbor_key_list, neighbor_distance_list, all_neighbor_kernels, all_neighbor_pfc_diffs, num_neighbors
def label_correlated(obj, chunk, dest, config, plot=False, load=True): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() #np.random.seed(100) # sample grasps sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps(obj, check_collisions=config['check_collisions'], vis=False) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps(obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions'], vis=plot) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps(obj, check_collisions=config['check_collisions'], vis=plot, grasp_gen_mult = 6) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' %(len(grasps))) logging.info('Grasp candidate loading took %f sec' %(sample_duration)) if not grasps: logging.info('Skipping %s' %(obj.key)) return None # extract load features for all grasps feature_start = time.clock() feature_extractor = ff.GraspableFeatureExtractor(obj, config) all_features = feature_extractor.compute_all_features(grasps) feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' %(len(all_features))) logging.info('Grasp feature loading took %f sec' %(feature_duration)) # bandit params num_trials = config['num_trials'] brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] brute_snapshot_rate = config['bandit_brute_force_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] # set up randome variables graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' %len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features # create nn structs for kernels nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel( sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # uniform allocation for true values ua_brute = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_brute_result = ua_brute.solve(termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=brute_snapshot_rate) final_model = ua_brute_result.models[-1] estimated_pfc = models.BetaBernoulliModel.beta_mean(final_model.alphas, final_model.betas) save_grasps(grasps, estimated_pfc, obj, dest, num_successes=final_model.alphas, num_failures=final_model.betas) # run bandits for several trials ua_rewards = [] ts_rewards = [] ts_corr_rewards = [] for t in range(num_trials): logging.info('Trial %d' %(t)) # Uniform sampling ua = das.UniformAllocationMean(objective, candidates) logging.info('Running Uniform allocation.') ua_result = ua.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # compile results ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) ua_rewards.append(ua_normalized_reward) ts_rewards.append(ts_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) # get the bandit rewards all_ua_rewards = np.array(ua_rewards) all_ts_rewards = np.array(ts_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) # compute avg normalized rewards avg_ua_rewards = np.mean(all_ua_rewards, axis=0) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) # kernel matrix kernel_matrix = kernel.matrix(candidates) return BanditCorrelatedExperimentResult(avg_ua_rewards, avg_ts_rewards, avg_ts_corr_rewards, estimated_pfc, ua_result.iters, kernel_matrix, obj_key=obj.key)
def label_correlated(obj, chunk, config, plot=False, priors_dataset=None, nearest_features_names=None): """Label an object with grasps according to probability of force closure, using correlated bandits.""" # bandit params num_trials = config['num_trials'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] bandit_start = time.clock() np.random.seed(100) candidates = load_candidate_grasps(obj, chunk) if candidates is None: return None # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # compute priors logging.info('Computing priors') if priors_dataset is None: priors_dataset = chunk prior_engine = pce.PriorComputationEngine(priors_dataset, config) # Compute priors all_alpha_priors = [] all_beta_priors = [] prior_comp_times = [] if nearest_features_names == None: alpha_priors, beta_priors = prior_engine.compute_priors( obj, candidates) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) else: for nearest_features_name in nearest_features_names: logging.info('Computing priors using %s' % (nearest_features_name)) priors_start_time = time.time() alpha_priors, beta_priors, neighbor_keys, neighbor_distances, neighbor_kernels, neighbor_pfc_diffs, num_grasp_neighbors = \ prior_engine.compute_priors(obj, candidates, nearest_features_name=nearest_features_name) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) priors_end_time = time.time() prior_comp_times.append(priors_end_time - priors_start_time) logging.info( 'Priors for %s took %f' % (nearest_features_name, priors_end_time - priors_start_time)) # pre-computed pfc values logging.info('Computing regression errors') true_pfc = np.array([c.grasp.quality for c in candidates]) prior_alphas = np.ones(true_pfc.shape) prior_betas = np.ones(true_pfc.shape) prior_pfc = 0.5 * np.ones(true_pfc.shape) ce_loss = objectives.CrossEntropyLoss(true_pfc) se_loss = objectives.SquaredErrorLoss(true_pfc) we_loss = objectives.WeightedSquaredErrorLoss(true_pfc) ccbp_ll = objectives.CCBPLogLikelihood(true_pfc) ce_vals = [ce_loss(prior_pfc)] se_vals = [se_loss(prior_pfc)] we_vals = [se_loss(prior_pfc)] # uniform weights at first ccbp_vals = [ccbp_ll.evaluate(prior_alphas, prior_betas)] total_weights = [len(candidates)] # compute estimated pfc values from alphas and betas for alpha_prior, beta_prior in zip(all_alpha_priors, all_beta_priors): estimated_pfc = models.BetaBernoulliModel.beta_mean( np.array(alpha_prior), np.array(beta_prior)) estimated_vars = models.BetaBernoulliModel.beta_variance( np.array(alpha_prior), np.array(beta_prior)) # compute losses ce_vals.append(ce_loss(estimated_pfc)) se_vals.append(se_loss(estimated_pfc)) we_vals.append(we_loss.evaluate(estimated_pfc, estimated_vars)) ccbp_vals.append( ccbp_ll.evaluate(np.array(alpha_prior), np.array(beta_prior))) total_weights.append(np.sum(estimated_vars)) ce_vals = np.array(ce_vals) se_vals = np.array(se_vals) we_vals = np.array(we_vals) ccbp_vals = np.array(ccbp_vals) total_weights = np.array(total_weights) # setup reward buffers ua_rewards = [] ts_rewards = [] gi_rewards = [] ts_corr_rewards = [] bucb_corr_rewards = [] all_ts_corr_prior_rewards = [] for x in range(0, len(all_alpha_priors)): all_ts_corr_prior_rewards.append([]) all_bucb_corr_prior_rewards = [] for x in range(0, len(all_alpha_priors)): all_bucb_corr_prior_rewards.append([]) # setup runtime buffers ua_runtimes = [] ts_runtimes = [] gi_runtimes = [] ts_corr_runtimes = [] bucb_corr_runtimes = [] all_ts_corr_prior_runtimes = [] for x in range(0, len(all_alpha_priors)): all_ts_corr_prior_runtimes.append([]) all_bucb_corr_prior_runtimes = [] for x in range(0, len(all_alpha_priors)): all_bucb_corr_prior_runtimes.append([]) # run bandits for several trials logging.info('Running bandits') for t in range(num_trials): logging.info('Trial %d' % (t)) # Uniform sampling ua = das.UniformAllocationMean(objective, candidates) logging.info('Running Uniform allocation.') ua_result = ua.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Gittins indices gi = das.GittinsIndex98(objective, candidates) logging.info('Running Gittins Indices.') gi_result = gi.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], p=config['lb_alpha']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence bucb_corr = das.CorrelatedGittins( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], p=config['lb_alpha']) #horizon=max_iter) logging.info('Running correlated Bayes UCB.') bucb_corr_result = bucb_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated MAB for faster convergence all_ts_corr_prior_ind = [] all_bucb_corr_prior_ind = [] for alpha_priors, beta_priors, ts_corr_prior_rewards, bucb_corr_prior_rewards, ts_corr_runtimes, bucb_corr_runtimes, nearest_features_name in \ zip(all_alpha_priors, all_beta_priors, all_ts_corr_prior_rewards, all_bucb_corr_prior_rewards, all_ts_corr_prior_runtimes, all_bucb_corr_prior_runtimes, nearest_features_names): # thompson sampling ts_corr_prior = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], alpha_prior=alpha_priors, beta_prior=beta_priors, p=config['lb_alpha']) logging.info( 'Running correlated Thompson sampling with priors from %s' % (nearest_features_name)) ts_corr_prior_result = ts_corr_prior.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) ts_corr_prior_normalized_reward = reward_vs_iters( ts_corr_prior_result, true_pfc) ts_corr_prior_rewards.append(ts_corr_prior_normalized_reward) ts_corr_runtimes.append(ts_corr_prior_result.total_time) all_ts_corr_prior_ind.append(ts_corr_prior_result.best_pred_ind) # bayes ucb bucb_corr = das.CorrelatedGittins( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], #horizon=max_iter, alpha_prior=alpha_priors, beta_prior=beta_priors, p=config['lb_alpha']) logging.info('Running correlated Bayes UCB with priors from %s' % (nearest_features_name)) bucb_corr_prior_result = bucb_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) bucb_corr_prior_normalized_reward = reward_vs_iters( bucb_corr_prior_result, true_pfc) bucb_corr_prior_rewards.append(bucb_corr_prior_normalized_reward) bucb_corr_runtimes.append(bucb_corr_prior_result.total_time) all_bucb_corr_prior_ind.append( bucb_corr_prior_result.best_pred_ind) # compile results ua_normalized_reward = reward_vs_iters(ua_result, true_pfc) ts_normalized_reward = reward_vs_iters(ts_result, true_pfc) gi_normalized_reward = reward_vs_iters(gi_result, true_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, true_pfc) bucb_corr_normalized_reward = reward_vs_iters(bucb_corr_result, true_pfc) ua_rewards.append(ua_normalized_reward) ts_rewards.append(ts_normalized_reward) gi_rewards.append(gi_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) bucb_corr_rewards.append(bucb_corr_normalized_reward) ua_runtimes.append(ua_result.total_time) ts_runtimes.append(ts_result.total_time) gi_runtimes.append(gi_result.total_time) ts_corr_runtimes.append(ts_corr_result.total_time) bucb_corr_runtimes.append(bucb_corr_result.total_time) if num_trials == 0: return None # get the bandit rewards all_ua_rewards = np.array(ua_rewards) all_ts_rewards = np.array(ts_rewards) all_gi_rewards = np.array(gi_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) all_bucb_corr_rewards = np.array(bucb_corr_rewards) all_avg_ts_corr_prior_rewards = [] for ts_corr_prior_rewards in all_ts_corr_prior_rewards: all_avg_ts_corr_prior_rewards.append( np.mean(np.array(ts_corr_prior_rewards), axis=0)) all_avg_bucb_corr_prior_rewards = [] for bucb_corr_prior_rewards in all_bucb_corr_prior_rewards: all_avg_bucb_corr_prior_rewards.append( np.mean(np.array(bucb_corr_prior_rewards), axis=0)) #all_avg_bucb_corr_prior_rewards.append([]) # get bandit indices ua_ind = ua_result.best_pred_ind ts_ind = ts_result.best_pred_ind ts_corr_ind = ts_corr_result.best_pred_ind bucb_corr_ind = bucb_corr_result.best_pred_ind # compute avg normalized rewards avg_ua_rewards = np.mean(all_ua_rewards, axis=0) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_gi_rewards = np.mean(all_gi_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) avg_bucb_corr_rewards = np.mean(all_bucb_corr_rewards, axis=0) #avg_bucb_corr_rewards = all_bucb_corr_rewards # compute avg runtimes avg_ua_runtimes = np.mean(np.array(ua_runtimes), axis=0) avg_ts_runtimes = np.mean(np.array(ts_runtimes), axis=0) avg_ts_corr_runtimes = np.mean(np.array(ts_corr_runtimes), axis=0) avg_bucb_corr_runtimes = np.mean(np.array(bucb_corr_runtimes), axis=0) all_avg_ts_corr_prior_runtimes = [] for ts_corr_prior_runtimes in all_ts_corr_prior_runtimes: all_avg_ts_corr_prior_runtimes.append( np.mean(np.array(ts_corr_prior_runtimes), axis=0)) all_avg_bucb_corr_prior_runtimes = [] for bucb_corr_prior_runtimes in all_bucb_corr_prior_runtimes: all_avg_bucb_corr_prior_runtimes.append( np.mean(np.array(bucb_corr_prior_runtimes), axis=0)) # kernel matrix kernel_matrix = kernel.matrix(candidates) return BanditCorrelatedPriorExperimentResult( avg_ua_rewards, avg_ts_rewards, avg_gi_rewards, avg_ts_corr_rewards, avg_bucb_corr_rewards, all_avg_ts_corr_prior_rewards, all_avg_bucb_corr_prior_rewards, true_pfc, ua_result.iters, kernel_matrix, [], [], [], ce_vals, ccbp_vals, we_vals, len(candidates), total_weights, ua_ind, ts_ind, ts_corr_ind, bucb_corr_ind, all_ts_corr_prior_ind, all_bucb_corr_prior_ind, avg_ua_runtimes, avg_ts_runtimes, avg_ts_corr_runtimes, avg_bucb_corr_runtimes, all_avg_ts_corr_prior_runtimes, all_avg_bucb_corr_prior_runtimes, prior_comp_times, obj_key=obj.key, neighbor_keys=neighbor_keys)
def eval_hyperparams(obj, chunk, config, plot=False, priors_dataset=None, nearest_features_names=None): """Label an object with grasps according to probability of force closure, using correlated bandits.""" # bandit params num_trials = config['num_trials'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] bandit_start = time.clock() np.random.seed(100) candidates = load_candidate_grasps(obj, chunk) if candidates is None: return None # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # compute priors logging.info('Computing priors') if priors_dataset is None: priors_dataset = chunk prior_engine = pce.PriorComputationEngine(priors_dataset, config) # Compute priors all_alpha_priors = [] all_beta_priors = [] prior_comp_times = [] if nearest_features_names == None: alpha_priors, beta_priors = prior_engine.compute_priors( obj, candidates) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) else: for nearest_features_name in nearest_features_names: logging.info('Computing priors using %s' % (nearest_features_name)) priors_start_time = time.time() alpha_priors, beta_priors, neighbor_keys, neighbor_distances, neighbor_kernels, neighbor_pfc_diffs, num_grasp_neighbors = \ prior_engine.compute_priors(obj, candidates, nearest_features_name=nearest_features_name) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) priors_end_time = time.time() prior_comp_times.append(priors_end_time - priors_start_time) logging.info( 'Priors for %s took %f' % (nearest_features_name, priors_end_time - priors_start_time)) # pre-computed pfc values logging.info('Computing regression errors') true_pfc = np.array([c.grasp.quality for c in candidates]) prior_alphas = np.ones(true_pfc.shape) prior_betas = np.ones(true_pfc.shape) prior_pfc = 0.5 * np.ones(true_pfc.shape) ce_loss = objectives.CrossEntropyLoss(true_pfc) se_loss = objectives.SquaredErrorLoss(true_pfc) we_loss = objectives.WeightedSquaredErrorLoss(true_pfc) ccbp_ll = objectives.CCBPLogLikelihood(true_pfc) ce_vals = [ce_loss(prior_pfc)] se_vals = [se_loss(prior_pfc)] we_vals = [se_loss(prior_pfc)] # uniform weights at first ccbp_vals = [ccbp_ll.evaluate(prior_alphas, prior_betas)] total_weights = [len(candidates)] # compute estimated pfc values from alphas and betas for alpha_prior, beta_prior in zip(all_alpha_priors, all_beta_priors): estimated_pfc = models.BetaBernoulliModel.beta_mean( np.array(alpha_prior), np.array(beta_prior)) estimated_vars = models.BetaBernoulliModel.beta_variance( np.array(alpha_prior), np.array(beta_prior)) # compute losses ce_vals.append(ce_loss(estimated_pfc)) se_vals.append(se_loss(estimated_pfc)) we_vals.append(we_loss.evaluate(estimated_pfc, estimated_vars)) ccbp_vals.append( ccbp_ll.evaluate(np.array(alpha_prior), np.array(beta_prior))) total_weights.append(np.sum(estimated_vars)) ce_vals = np.array(ce_vals) se_vals = np.array(se_vals) we_vals = np.array(we_vals) ccbp_vals = np.array(ccbp_vals) total_weights = np.array(total_weights) # create hyperparam dict num_grasps = len(candidates) hyperparams = {} hyperparams['weight_grad'] = config['weight_grad_x'] hyperparams['weight_moment'] = config['weight_gravity'] hyperparams['weight_shape'] = config['prior_neighbor_weight'] hyperparams['num_neighbors'] = config['prior_num_neighbors'] return HyperparamEvalResult(ce_vals, se_vals, we_vals, ccbp_vals, num_grasps, total_weights, hyperparams, prior_comp_times, obj_key=obj.key, neighbor_keys=neighbor_keys)
def label_correlated(obj, chunk, dest, config, plot=False, load=True): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() np.random.seed(100) chunk = db.Chunk(config) if not load: # load grasps from database sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions'], vis=plot) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps( obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions'], vis=plot) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions'], vis=plot, grasp_gen_mult=6) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' % (len(grasps))) logging.info('Grasp candidate loading took %f sec' % (sample_duration)) if not grasps: logging.info('Skipping %s' % (obj.key)) return None else: grasps = load_grasps(obj, dest) grasps = grasps[:20] # grasps = chunk.load_grasps(obj.key) # load features for all grasps feature_start = time.clock() feature_extractor = ff.GraspableFeatureExtractor(obj, config) features = feature_extractor.compute_all_features(grasps) """ if not load: features = feature_extractor.compute_all_features(grasps) else: feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config) features = feature_loader.load_all_features(grasps) # in same order as grasps """ feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' % (len(features))) logging.info('Grasp feature loading took %f sec' % (feature_duration)) # prune crappy grasps all_features = [] all_grasps = [] for grasp, feature in zip(grasps, features): if feature is not None: all_grasps.append(grasp) all_features.append(feature) grasps = all_grasps # compute distances for debugging distances = np.zeros([len(grasps), len(grasps)]) i = 0 for feature_i in all_features: j = 0 for feature_j in all_features: distances[i, j] = np.linalg.norm(feature_i.phi - feature_j.phi) j += 1 i += 1 # bandit params brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() if not load: # uniform allocation for true values ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition( brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) save_grasps(grasps, estimated_pfc, obj, dest) # plot params line_width = config['line_width'] font_size = config['font_size'] dpi = config['dpi'] # plot histograms num_bins = 100 bin_edges = np.linspace(0, 1, num_bins + 1) plt.figure() n, bins, patches = plt.hist(estimated_pfc, bin_edges) plt.xlabel('Probability of Success', fontsize=font_size) plt.ylabel('Num Grasps', fontsize=font_size) plt.title('Histogram of Grasps by Probability of Success', fontsize=font_size) plt.show() exit(0) else: estimated_pfc = np.array([g.quality for g in grasps]) # debugging for examining bad features bad_i = 0 bad_j = 1 grasp_i = grasps[bad_i] grasp_j = grasps[bad_j] pfc_i = estimated_pfc[bad_i] pfc_j = estimated_pfc[bad_j] features_i = all_features[bad_i] features_j = all_features[bad_j] feature_sq_diff = (features_i.phi - features_j.phi)**2 # grasp_i.close_fingers(obj, vis=True) # grasp_j.close_fingers(obj, vis=True) grasp_i.surface_information(obj, config['window_width'], config['window_steps']) grasp_j.surface_information(obj, config['window_width'], config['window_steps']) w = config['window_steps'] wi1 = np.reshape(features_i.extractors_[0].extractors_[1].phi, [w, w]) wi2 = np.reshape(features_i.extractors_[1].extractors_[1].phi, [w, w]) wj1 = np.reshape(features_j.extractors_[0].extractors_[1].phi, [w, w]) wj2 = np.reshape(features_j.extractors_[1].extractors_[1].phi, [w, w]) a = 0.1 plt.figure() plt.subplot(2, 2, 1) plt.imshow(wi1, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wi1') plt.subplot(2, 2, 2) plt.imshow(wi2, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wi2') plt.subplot(2, 2, 3) plt.imshow(wj1, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wj1') plt.subplot(2, 2, 4) plt.imshow(wj2, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wj2') # plt.show() # IPython.embed() num_trials = config['num_trials'] ts_rewards = [] ts_corr_rewards = [] for t in range(num_trials): logging.info('Trial %d' % (t)) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) ts_rewards.append(ts_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) # get the bandit rewards all_ts_rewards = np.array(ts_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) # get correlations and plot k = kernel.matrix(candidates) k_vec = k.ravel() pfc_arr = np.array([estimated_pfc]).T pfc_diff = ssd.squareform(ssd.pdist(pfc_arr)) pfc_vec = pfc_diff.ravel() bad_ind = np.where(pfc_diff > 1.0 - k) plt.figure() plt.scatter(k_vec, pfc_vec) plt.xlabel('Kernel', fontsize=15) plt.ylabel('PFC Diff', fontsize=15) plt.title('Correlations', fontsize=15) # plt.show() # IPython.embed() # plot params line_width = config['line_width'] font_size = config['font_size'] dpi = config['dpi'] # plot histograms num_bins = 100 bin_edges = np.linspace(0, 1, num_bins + 1) plt.figure() n, bins, patches = plt.hist(estimated_pfc, bin_edges) plt.xlabel('Probability of Success', fontsize=font_size) plt.ylabel('Num Grasps', fontsize=font_size) plt.title('Histogram of Grasps by Probability of Success', fontsize=font_size) # plot the results plt.figure() plt.plot(ts_result.iters, avg_ts_rewards, c=u'g', linewidth=line_width, label='Thompson Sampling (Uncorrelated)') plt.plot(ts_corr_result.iters, avg_ts_corr_rewards, c=u'r', linewidth=line_width, label='Thompson Sampling (Correlated)') plt.xlim(0, np.max(ts_result.iters)) plt.ylim(0.5, 1) plt.xlabel('Iteration', fontsize=font_size) plt.ylabel('Normalized Probability of Force Closure', fontsize=font_size) plt.title('Avg Normalized PFC vs Iteration', fontsize=font_size) handles, labels = plt.gca().get_legend_handles_labels() plt.legend(handles, labels, loc='lower right') plt.show() IPython.embed() """ # aggregate grasps object_grasps = [candidates[i].grasp for i in ts_result.best_candidates] grasp_qualities = list(ts_result.best_pred_means) bandit_stop = time.clock() logging.info('Bandits took %f sec' %(bandit_stop - bandit_start)) # get rotated, translated versions of grasps delay = 0 pr2_grasps = [] pr2_grasp_qualities = [] theta_res = config['grasp_theta_res'] * np.pi # grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps']) if config['vis_grasps']: delay = config['vis_delay'] for grasp, grasp_quality in zip(object_grasps, grasp_qualities): rotated_grasps = grasp.transform(obj.tf, theta_res) # rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay) pr2_grasps.extend(rotated_grasps) pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps)) logging.info('Num grasps: %d' %(len(pr2_grasps))) grasp_filename = os.path.join(dest, obj.key + '.json') with open(grasp_filename, 'w') as f: jsons.dump([g.to_json(quality=q) for g, q in zip(pr2_grasps, pr2_grasp_qualities)], f) ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) return BanditCorrelatedExperimentResult(ua_normalized_reward, ts_normalized_reward, ts_corr_normalized_reward, ua_result, ts_result, ts_corr_result, obj_key=obj.key) """ return None
def label_correlated(obj, chunk, dest, config, plot=False): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() np.random.seed(100) # load grasps from database sample_start = time.clock() grasps = chunk.load_grasps(obj.key) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' % (len(grasps))) logging.info('Grasp candidate loading took %f sec' % (sample_duration)) if not grasps: logging.info('Skipping %s' % (obj.key)) return None # load features for all grasps feature_start = time.clock() feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config) all_features = feature_loader.load_all_features( grasps) # in same order as grasps feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' % (len(all_features))) logging.info('Grasp feature loading took %f sec' % (feature_duration)) # bandit params brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), # tc.ConfidenceTerminationCondition(confidence) ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) if config['grasp_symmetry']: def swapped_phi(rv): return rv.swapped_features nn = kernels.SymmetricKDTree(phi=phi, alternate_phi=swapped_phi) kernel = kernels.SymmetricSquaredExponentialKernel( sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi, alternate_phi=swapped_phi) objective = objectives.RandomBinaryObjective() # pre-computed pfc values estimated_pfc = np.array([c.grasp.quality for c in candidates]) # uniform allocation baseline ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation.') ua_result = ua.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling for faster convergence ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) object_grasps = [candidates[i].grasp for i in ts_result.best_candidates] grasp_qualities = list(ts_result.best_pred_means) bandit_stop = time.clock() logging.info('Bandits took %f sec' % (bandit_stop - bandit_start)) # get rotated, translated versions of grasps delay = 0 pr2_grasps = [] pr2_grasp_qualities = [] theta_res = config['grasp_theta_res'] * np.pi # grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps']) if config['vis_grasps']: delay = config['vis_delay'] for grasp, grasp_quality in zip(object_grasps, grasp_qualities): rotated_grasps = grasp.transform(obj.tf, theta_res) # rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay) pr2_grasps.extend(rotated_grasps) pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps)) logging.info('Num grasps: %d' % (len(pr2_grasps))) grasp_filename = os.path.join(dest, obj.key + '.json') with open(grasp_filename, 'w') as f: jsons.dump([ g.to_json(quality=q) for g, q in zip(pr2_grasps, pr2_grasp_qualities) ], f) ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) return BanditCorrelatedExperimentResult(ua_normalized_reward, ts_normalized_reward, ts_corr_normalized_reward, estimated_pfc, ua_result.iters, kernel.matrix(candidates), obj_key=obj.key)
def __init__(self, config): self.feature_extractor_ = CNNBatchFeatureExtractor(config) matcher = kernels.KDTree(phi=lambda x: x.descriptor) self._parse_config(config) di.Hdf5DatabaseIndexer.__init__(self, matcher)
def _create_neighbors(self, feature_vectors): feature_objects = map(FeatureObject, feature_vectors.keys(), feature_vectors.values()) neighbors = kernels.KDTree(phi=lambda x: x.feature_vector) neighbors.train(feature_objects) return neighbors