def getprobability(object, grasps): obj_name = object[1] sdf_name = object[2] obj_mesh = of.ObjFile(obj_name).read() sdf_ = sf.SdfFile(sdf_name).read() obj = go.GraspableObject3D(sdf_, mesh=obj_mesh, key=object[0].replace("_features.txt", ""), model_name=obj_name) config_name = "cfg/correlated.yaml" config = ec.ExperimentConfig(config_name) np.random.seed(100) brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), # tc.ConfidenceTerminationCondition(confidence) ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV # compute feature vectors for all grasps feature_extractor = ff.GraspableFeatureExtractor(obj, config) all_features = feature_extractor.compute_all_features(grasps) candidates = [] for grasp, features in zip(grasps, all_features): grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: pass else: pfc_rv.set_features(features) candidates.append(pfc_rv) def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # uniform allocation for true values ua = das.UniformAllocationMean(objective, candidates) ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) return estimated_pfc
def solve( self, termination_condition=tc.MaxIterTerminationCondition(DEF_MAX_ITER), snapshot_rate=1): """ Call discrete maxmization function with all candidates """ return self.discrete_maximize(self.candidates_, termination_condition, snapshot_rate)
def test_grad_ascent(): np.random.seed(100) # init vars x_dim = int(10) b_dim = int(5) A = np.random.rand(b_dim, x_dim) b = np.random.rand(b_dim) x_0 = np.random.rand(x_dim) objective = objectives.MinimizationObjective( objectives.LeastSquaresObjective(A, b)) # get actual solution try: true_best_x = np.linalg.solve(A.T.dot(A), A.T.dot(b)) except np.linalg.LinAlgError: logging.error('A transpose A ws not invertible!') true_best_f = objective(true_best_x) # run gradient ascent step_policy = BacktrackingLSPolicy() optimizer = UnconstrainedGradientAscent(objective, step_policy) result = optimizer.solve( termination_condition=tc.MaxIterTerminationCondition(100), snapshot_rate=1, start_x=x_0, true_x=true_best_x) assert (np.abs(np.linalg.norm(result.best_f - true_best_f)) < 1e-2) logging.info('Val at true best x: %f' % (true_best_f)) logging.info('Val at estimated best x: %f' % (result.best_f)) plot_value_vs_time_gradient(result, true_best_f, 'Least squares grad ascent')
def test_gittins_indices_98(num_candidates=NUM_CANDIDATES): # get candidates np.random.seed(1000) pred_means = np.random.rand(num_candidates) candidates = [] for i in range(num_candidates): candidates.append(BernoulliRV(pred_means[i])) # get true maximum true_max = np.max(pred_means) true_max_indices = np.where(pred_means == true_max) # solve using uniform allocation obj = objectives.RandomBinaryObjective() ua = GittinsIndex98(obj, candidates) result = ua.solve(termination_condition=tc.MaxIterTerminationCondition( MAX_ITERS * 10), snapshot_rate=SNAPSHOT_RATE) # NOTE: needs more iters on this problem # check result (not guaranteed to work in finite iterations but whatever) assert (len(result.best_candidates) == 1) assert (np.abs(result.best_candidates[0].p() - true_max) < 1e-4) logging.info('Gittins Indices test passed!') logging.info('Took %f sec' % (result.total_time)) logging.info('Best index %d' % (true_max_indices[0])) # visualize result plot_num_pulls(result) plt.title('Observations Per Variable for Gittins Indices 98') plot_value_vs_time(result, candidates, true_max) plt.title('P(Success) versus Iterations for Gittins Indices 98') return result
def test_gaussian_ucb(num_candidates=NUM_CANDIDATES): # get candidates np.random.seed(1000) actual_means = np.random.rand(num_candidates) candidates = [BernoulliRV(m) for m in actual_means] # get true maximum true_max = np.max(actual_means) true_max_indices = np.where(actual_means == true_max) # solve using GP-UCB obj = objectives.RandomBinaryObjective() ts = GaussianUCBSampling(obj, candidates) result = ts.solve( termination_condition=tc.MaxIterTerminationCondition(MAX_ITERS), snapshot_rate=SNAPSHOT_RATE) # check result (not guaranteed to work in finite iterations but whatever) assert len(result.best_candidates) == 1 assert np.abs(result.best_candidates[0].p() - true_max) < 1e-4 logging.info('Gaussian UCB sampling test passed!') logging.info('Took %f sec' % (result.total_time)) logging.info('Best index %d' % (true_max_indices[0])) # visualize result plot_num_pulls(result) plt.title('Observations Per Variable for Gaussian UCB Sampling') plot_value_vs_time(result, candidates, true_max) plt.title('P(Success) versus Iterations for Gaussian UCB Sampling') return result
def solve( self, termination_condition=tc.MaxIterTerminationCondition(DEF_MAX_ITER), snapshot_rate=1): ''' Solves for the maximal / minimal point ''' pass
def run_ua_on(obj, config): # sample grasps sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions']) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps( obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions']) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions']) # generate pfc candidates graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) candidates = [] for grasp in grasps: logging.info('Adding grasp %d candidate' % (len(candidates))) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) candidates.append(pfc_rv) logging.info('%d candidates', len(candidates)) brute_force_iter = config['bandit_brute_force_iter'] * len(candidates) snapshot_rate = brute_force_iter objective = objectives.RandomBinaryObjective() ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') bandit_start = time.clock() ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) bandit_end = time.clock() bandit_duration = bandit_end - bandit_start logging.info('Uniform allocation (%d iters) took %f sec' % (brute_force_iter, bandit_duration)) return ua_result
def test_stochastic_grad_ascent_logistic(): np.random.seed(100) num_from_zero = 100 zero_mean, zero_cov = np.random.rand(2), 3e-2 * np.diag(np.random.rand(2)) num_from_one = 100 one_mean, one_cov = np.random.rand(2), 3e-2 * np.diag(np.random.rand(2)) from_zero = np.random.multivariate_normal(zero_mean, zero_cov, num_from_zero) from_one = np.random.multivariate_normal(one_mean, one_cov, num_from_one) X = np.hstack([ np.ones((num_from_zero + num_from_one, 1)), np.vstack([from_zero, from_one]) ]) y = np.hstack([np.zeros(num_from_zero), np.ones(num_from_one)]) objective = objectives.MinimizationObjective( objectives.StochasticLogisticCrossEntropyObjective(X, y, batch_size=50)) # get ground truth solution from scikit-learn from sklearn.linear_model import LogisticRegression solver = LogisticRegression(fit_intercept=False) solver.fit(X, y) solution = solver.coef_.squeeze() solver_score = sum(solver.predict(X) == y) true_best_f = objective(solution) logging.info('scikit-learn solution: %s', solution) logging.info('scikit-learn score: %f', true_best_f) logging.info('scikit-learn iterations: %d', solver.n_iter_) # run gradient ascent step_policy = DecayingStepPolicy(1) optimizer = UnconstrainedGradientAscent(objective, step_policy) result = optimizer.solve( termination_condition=tc.MaxIterTerminationCondition(100), snapshot_rate=1, start_x=np.random.rand(3), true_x=solution) logging.info('our solution: %s', result.best_x) logging.info('our score: %f', result.best_f) logging.info('Val at true best x: %f' % (true_best_f)) logging.info('Val at estimated best x: %f' % (result.best_f)) plot_value_vs_time_gradient(result, true_best_f, 'Logistic SGA')
def test_correlated_thompson_sampling(num_candidates=NUM_CANDIDATES, sig=1.0, eps=0.5): # get candidates actual_means = np.linspace(0.0, 1.0, num=num_candidates) candidates = [BernoulliRV(m) for m in actual_means] # get true maximum true_max = np.max(actual_means) true_max_indices = np.where(actual_means == true_max) # constructing nearest neighbor and kernel def phi(bern): return np.array([round(bern.p(), 2)]) nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=sig, phi=phi) # solve using Thompson sampling obj = objectives.RandomBinaryObjective() ts = CorrelatedThompsonSampling(obj, candidates, nn, kernel, tolerance=eps) result = ts.solve( termination_condition=tc.MaxIterTerminationCondition(MAX_ITERS), snapshot_rate=SNAPSHOT_RATE) # check result (not guaranteed to work in finite iterations but whatever) assert len(result.best_candidates) == 1 assert np.abs(result.best_candidates[0].p() - true_max) < 1e-4 logging.info('Correlated Thompson sampling test passed!') logging.info('Took %f sec' % (result.total_time)) logging.info('Best index %d' % (true_max_indices[0])) info = u' (σ=%.1f, ɛ=%.3f)' % (sig, eps) # visualize result plot_num_pulls(result) plt.title('Observations Per Variable for Correlated Thompson Sampling' + info) plot_value_vs_time(result, candidates, true_max) plt.title('P(Success) versus Iterations for Correlated Thompson Sampling' + info) return result
def top_K_solve( self, K, termination_condition=tc.MaxIterTerminationCondition(DEF_MAX_ITER), snapshot_rate=1): """ Solves for the top K maximal / minimal points """ # partition the input space if K == 1: candidate_bins = [self.candidates_] else: candidate_bins = self.partition(K) # maximize over each bin top_K_results = [] for k in range(K): top_K_results.append( self.discrete_maximize(candidate_bins[k], termination_condition, snapshot_rate)) return top_K_results
def test_thompson_sampling(num_candidates=NUM_CANDIDATES, random=False): # get candidates np.random.seed(1000) if random: pred_means = np.random.rand(num_candidates) else: pred_means = np.linspace(0.0, 1.0, num=num_candidates) candidates = [] for i in range(num_candidates): candidates.append(BernoulliRV(pred_means[i])) # get true maximum true_max = np.max(pred_means) true_max_indices = np.where(pred_means == true_max) # solve using uniform allocation obj = objectives.RandomBinaryObjective() ua = ThompsonSampling(obj, candidates) result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(MAX_ITERS), snapshot_rate=SNAPSHOT_RATE) # check result (not guaranteed to work in finite iterations but whatever) assert (len(result.best_candidates) == 1) assert (np.abs(result.best_candidates[0].p() - true_max) < 1e-4) logging.info('Thompson sampling test passed!') logging.info('Took %f sec' % (result.total_time)) logging.info('Best index %d' % (true_max_indices[0])) # visualize result plot_num_pulls(result) plt.title('Observations Per Variable for Thompson Sampling') plot_value_vs_time(result, candidates, true_max) plt.title('P(Success) versus Iterations for Thompson Sampling') return result
def expected_quality(grasp_rv, graspable_rv, params_rv, quality_config): """ Get the expected quality wrt given random variables """ # set up random variable q_rv = QuasiStaticGraspQualityRV(grasp_rv, graspable_rv, params_rv, quality_config) candidates = [q_rv] # brute force with uniform allocation snapshot_rate = quality_config['sampling_snapshot_rate'] num_samples = quality_config['num_quality_samples'] objective = objectives.RandomContinuousObjective() ua = das.GaussianUniformAllocationMean(objective, candidates) ua_result = ua.solve(termination_condition = tc.MaxIterTerminationCondition(num_samples), snapshot_rate = snapshot_rate) # convert to estimated prob success final_model = ua_result.models[-1] mn_q = final_model.means std_q = final_model.sample_vars return mn_q[0], std_q[0]
def label_pfc(obj, dataset, output_dir, config): """ Label an object with grasps according to probability of force closure """ # sample intial antipodal grasps start = time.clock() sampler = ags.AntipodalGraspSampler(config) start_time = time.clock() grasps, alpha_thresh, rho_thresh = sampler.generate_grasps(obj, vis=False) end_time = time.clock() duration = end_time - start_time logging.info('Antipodal grasp candidate generation took %f sec' % (duration)) # partition grasps grasp_partitions = pfc.space_partition_grasps(grasps, config) # bandit params max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), tc.ConfidenceTerminationCondition(confidence) ] # run bandits on each partition object_grasps = [] grasp_qualities = [] i = 0 for grasp_partition in grasp_partitions: logging.info('Finding highest quality grasp in partition %d' % (i)) # create random variables graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm( config['friction_coef'], config['sigma_mu']) # friction gaussian random variable candidates = [] for grasp in grasp_partition: grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) candidates.append( pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)) # run bandits objective = objectives.RandomBinaryObjective() ts = das.ThompsonSampling(objective, candidates) ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) object_grasps.extend([c.grasp for c in ts_result.best_candidates]) grasp_qualities.extend(list(ts_result.best_pred_means)) i = i + 1 stop = time.clock() logging.info('Took %d sec' % (stop - start)) # get rotated, translated versions of grasps delay = 0 pr2_grasps = [] pr2_grasp_qualities = [] theta_res = config['grasp_theta_res'] * np.pi grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps']) i = 0 if config['vis_grasps']: delay = config['vis_delay'] for grasp in object_grasps: print 'Grasp', i rotated_grasps = grasp.transform(obj.tf, theta_res) rotated_grasps = grasp_checker.prune_grasps_in_collision( obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay) pr2_grasps.extend(rotated_grasps) pr2_grasp_qualities.extend([grasp_qualities[i]] * len(rotated_grasps)) i = i + 1 logging.info('Num grasps: %d' % (len(pr2_grasps))) # save grasps locally :( Due to problems with sudo grasp_filename = os.path.join(output_dir, obj.key + '.json') with open(grasp_filename, 'w') as f: jsons.dump([ pr2_grasps[i].to_json(quality=pr2_grasp_qualities[i]) for i in range(len(pr2_grasps)) ], f)
def test_ua_vs_thompson(num_trials=20, num_candidates=1000, brute_iters=50000, max_iters=5000, snapshot_rate=20): # get candidates np.random.seed(1000) prior_dist = 'gaussian' # iterate through trials ua_results = [] ts_results = [] true_pfcs = [] est_pfcs = [] for i in range(num_trials): logging.info('Trial %d' % (i)) # generate rangom candidates if prior_dist == 'gaussian': true_pfc = scipy.stats.norm.rvs(loc=0.5, scale=0.1, size=num_candidates) true_pfc[true_pfc < 0] = 0 true_pfc[true_pfc > 1] = 1 else: true_pfc = np.random.rand(num_candidates) candidates = [] for i in range(num_candidates): candidates.append(BernoulliRV(true_pfc[i])) # get true maximum true_max = np.max(true_pfc) true_max_indices = np.where(true_pfc == true_max) # solve using uniform allocation obj = objectives.RandomBinaryObjective() ua = UniformAllocationMean(obj, candidates) ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_iters), snapshot_rate=snapshot_rate) ts = ThompsonSampling(obj, candidates) ts_result = ts.solve( termination_condition=tc.MaxIterTerminationCondition(max_iters), snapshot_rate=snapshot_rate) # check result (not guaranteed to work in finite iterations but whatever) logging.info('UA took %f sec' % (ua_result.total_time)) logging.info('UA best index %d' % (true_max_indices[0][0])) logging.info('TS took %f sec' % (ts_result.total_time)) logging.info('TS best index %d' % (true_max_indices[0][0])) brute_model = ua_result.models[-1] true_pfcs.append(true_pfc) est_pfcs.append( models.BetaBernoulliModel.beta_mean(brute_model.alphas, brute_model.betas)) ua_results.append(ua_result) ts_results.append(ts_result) # aggregate results wrt truth all_ua_norm_rewards = np.zeros([len(ua_results), len(ua_results[0].iters)]) all_ts_norm_rewards = np.zeros([len(ts_results), len(ts_results[0].iters)]) j = 0 for true_pfc, result in zip(true_pfcs, ua_results): best_pfc = np.max(true_pfc) ua_pred_values = np.array( [true_pfc[m.best_pred_ind] for m in result.models]) all_ua_norm_rewards[j, :] = ua_pred_values / best_pfc j += 1 j = 0 for true_pfc, result in zip(true_pfcs, ts_results): best_pfc = np.max(true_pfc) ts_pred_values = np.array( [true_pfc[m.best_pred_ind] for m in result.models]) all_ts_norm_rewards[j, :] = ts_pred_values / best_pfc j += 1 # aggregate results wrt est all_ua_norm_est_rewards = np.zeros( [len(ua_results), len(ua_results[0].iters)]) all_ts_norm_est_rewards = np.zeros( [len(ts_results), len(ts_results[0].iters)]) j = 0 for est_pfc, result in zip(est_pfcs, ua_results): best_pfc = np.max(est_pfc) ua_pred_values = np.array( [est_pfc[m.best_pred_ind] for m in result.models]) all_ua_norm_est_rewards[j, :] = ua_pred_values / best_pfc j += 1 j = 0 for est_pfc, result in zip(est_pfcs, ts_results): best_pfc = np.max(est_pfc) ts_pred_values = np.array( [est_pfc[m.best_pred_ind] for m in result.models]) all_ts_norm_est_rewards[j, :] = ts_pred_values / best_pfc j += 1 # params line_width = 2.5 font_size = 15 # histogram of all arms all_true_pfcs = np.zeros(0) for true_pfc in true_pfcs: all_true_pfcs = np.r_[all_true_pfcs, true_pfc] num_bins = 100 bin_edges = np.linspace(0, 1, num_bins + 1) plt.figure() n, bins, patches = plt.hist(all_true_pfcs, bin_edges) plt.xlabel('Probability of Success', fontsize=font_size) plt.ylabel('Num Grasps', fontsize=font_size) plt.title('Histogram of Grasps by Probability of Success', fontsize=font_size) # visualize result ua_avg_norm_reward = np.mean(all_ua_norm_rewards, axis=0) ts_avg_norm_reward = np.mean(all_ts_norm_rewards, axis=0) plt.figure() plt.plot(ua_results[0].iters, ua_avg_norm_reward, c=u'b', linewidth=line_width, label='Uniform Allocation') plt.plot(ts_results[0].iters, ts_avg_norm_reward, c=u'g', linewidth=line_width, label='Thompson Sampling') plt.xlim(0, np.max(ts_results[0].iters)) plt.ylim(0.5, 1) plt.xlabel('Iteration', fontsize=font_size) plt.ylabel('Normalized Probability of Force Closure', fontsize=font_size) plt.title('Avg Normalized PFC vs Iteration', fontsize=font_size) handles, labels = plt.gca().get_legend_handles_labels() plt.legend(handles, labels, loc='lower right') # visualize est result ua_avg_norm_est_reward = np.mean(all_ua_norm_est_rewards, axis=0) ts_avg_norm_est_reward = np.mean(all_ts_norm_est_rewards, axis=0) plt.figure() plt.plot(ua_results[0].iters, ua_avg_norm_est_reward, c=u'b', linewidth=line_width, label='Uniform Allocation') plt.plot(ts_results[0].iters, ts_avg_norm_est_reward, c=u'g', linewidth=line_width, label='Thompson Sampling') plt.xlim(0, np.max(ts_results[0].iters)) plt.ylim(0.5, 1) plt.xlabel('Iteration', fontsize=font_size) plt.ylabel('Normalized Probability of Force Closure', fontsize=font_size) plt.title('Avg Estimated Normalized PFC vs Iteration', fontsize=font_size) handles, labels = plt.gca().get_legend_handles_labels() plt.legend(handles, labels, loc='lower right') plt.show() return result
def discrete_maximize(self, candidates, termination_condition=tc.MaxIterTerminationCondition( solvers.DEF_MAX_ITER), snapshot_rate=1): """ Maximizes a function over a discrete set of variables by iteratively predicting the best point (using some model policy) """ # check input if len(candidates) == 0: raise ValueError('No candidates specified') if not isinstance(self.model_, models.DiscreteModel): logging.error('Illegal model specified') raise ValueError( 'Illegitimate model used in DiscreteAdaptiveSampler') # init vars terminate = False k = 0 # cur iter num_candidates = len(candidates) self.reset_model(candidates) # update model with new candidates # logging times = [] iters = [] iter_indices = [] iter_vals = [] iter_models = [] start_time = time.clock() next_ind_val = 0 while not terminate: # get next point to sample next_ind = self.selection_policy_.choose_next() # evaluate the function at the given point (can be nondeterministic) prev_ind_val = next_ind_val next_ind_val = self.objective_.evaluate(candidates[next_ind]) # snapshot the model and whatnot if (k % snapshot_rate) == 0: #logging.info('Iteration %d' %(k)) # log time and stuff checkpt = time.clock() times.append(checkpt - start_time) iters.append(k) iter_indices.append(next_ind) iter_vals.append(next_ind_val) iter_models.append(self.model_.snapshot()) # update the model (e.g. posterior update, grasp pruning) self.model_.update(next_ind, next_ind_val) # check termination condiation terminate = termination_condition(k, cur_val=next_ind_val, prev_val=prev_ind_val, model=self.model_) k = k + 1 # log final values checkpt = time.clock() times.append(checkpt - start_time) iters.append(k) iter_indices.append(next_ind) iter_vals.append(next_ind_val) iter_models.append(self.model_.snapshot()) # log total runtime end_time = time.clock() total_duration = end_time - start_time # log results and return best_indices, best_pred_means, best_pred_vars = self.model_.max_prediction( ) best_candidates = [] num_best = best_indices.shape[0] for i in range(num_best): best_candidates.append(best_indices[i]) return AdaptiveSamplingResult(best_candidates, best_pred_means, best_pred_vars, total_duration, times, iters, iter_indices, iter_vals, iter_models)
def label_correlated(obj, chunk, dest, config, plot=False): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() np.random.seed(100) # load grasps from database sample_start = time.clock() grasps = chunk.load_grasps(obj.key) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' % (len(grasps))) logging.info('Grasp candidate loading took %f sec' % (sample_duration)) if not grasps: logging.info('Skipping %s' % (obj.key)) return None # load features for all grasps feature_start = time.clock() feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config) all_features = feature_loader.load_all_features( grasps) # in same order as grasps feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' % (len(all_features))) logging.info('Grasp feature loading took %f sec' % (feature_duration)) # bandit params brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), # tc.ConfidenceTerminationCondition(confidence) ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) if config['grasp_symmetry']: def swapped_phi(rv): return rv.swapped_features nn = kernels.SymmetricKDTree(phi=phi, alternate_phi=swapped_phi) kernel = kernels.SymmetricSquaredExponentialKernel( sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi, alternate_phi=swapped_phi) objective = objectives.RandomBinaryObjective() # pre-computed pfc values estimated_pfc = np.array([c.grasp.quality for c in candidates]) # uniform allocation baseline ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation.') ua_result = ua.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling for faster convergence ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) object_grasps = [candidates[i].grasp for i in ts_result.best_candidates] grasp_qualities = list(ts_result.best_pred_means) bandit_stop = time.clock() logging.info('Bandits took %f sec' % (bandit_stop - bandit_start)) # get rotated, translated versions of grasps delay = 0 pr2_grasps = [] pr2_grasp_qualities = [] theta_res = config['grasp_theta_res'] * np.pi # grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps']) if config['vis_grasps']: delay = config['vis_delay'] for grasp, grasp_quality in zip(object_grasps, grasp_qualities): rotated_grasps = grasp.transform(obj.tf, theta_res) # rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay) pr2_grasps.extend(rotated_grasps) pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps)) logging.info('Num grasps: %d' % (len(pr2_grasps))) grasp_filename = os.path.join(dest, obj.key + '.json') with open(grasp_filename, 'w') as f: jsons.dump([ g.to_json(quality=q) for g, q in zip(pr2_grasps, pr2_grasp_qualities) ], f) ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) return BanditCorrelatedExperimentResult(ua_normalized_reward, ts_normalized_reward, ts_corr_normalized_reward, estimated_pfc, ua_result.iters, kernel.matrix(candidates), obj_key=obj.key)
def label_correlated(obj, chunk, dest, config, plot=False, load=True): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() np.random.seed(100) chunk = db.Chunk(config) if not load: # load grasps from database sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions'], vis=plot) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps( obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions'], vis=plot) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions'], vis=plot, grasp_gen_mult=6) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' % (len(grasps))) logging.info('Grasp candidate loading took %f sec' % (sample_duration)) if not grasps: logging.info('Skipping %s' % (obj.key)) return None else: grasps = load_grasps(obj, dest) grasps = grasps[:20] # grasps = chunk.load_grasps(obj.key) # load features for all grasps feature_start = time.clock() feature_extractor = ff.GraspableFeatureExtractor(obj, config) features = feature_extractor.compute_all_features(grasps) """ if not load: features = feature_extractor.compute_all_features(grasps) else: feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config) features = feature_loader.load_all_features(grasps) # in same order as grasps """ feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' % (len(features))) logging.info('Grasp feature loading took %f sec' % (feature_duration)) # prune crappy grasps all_features = [] all_grasps = [] for grasp, feature in zip(grasps, features): if feature is not None: all_grasps.append(grasp) all_features.append(feature) grasps = all_grasps # compute distances for debugging distances = np.zeros([len(grasps), len(grasps)]) i = 0 for feature_i in all_features: j = 0 for feature_j in all_features: distances[i, j] = np.linalg.norm(feature_i.phi - feature_j.phi) j += 1 i += 1 # bandit params brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] # run bandits! graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() if not load: # uniform allocation for true values ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition( brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) save_grasps(grasps, estimated_pfc, obj, dest) # plot params line_width = config['line_width'] font_size = config['font_size'] dpi = config['dpi'] # plot histograms num_bins = 100 bin_edges = np.linspace(0, 1, num_bins + 1) plt.figure() n, bins, patches = plt.hist(estimated_pfc, bin_edges) plt.xlabel('Probability of Success', fontsize=font_size) plt.ylabel('Num Grasps', fontsize=font_size) plt.title('Histogram of Grasps by Probability of Success', fontsize=font_size) plt.show() exit(0) else: estimated_pfc = np.array([g.quality for g in grasps]) # debugging for examining bad features bad_i = 0 bad_j = 1 grasp_i = grasps[bad_i] grasp_j = grasps[bad_j] pfc_i = estimated_pfc[bad_i] pfc_j = estimated_pfc[bad_j] features_i = all_features[bad_i] features_j = all_features[bad_j] feature_sq_diff = (features_i.phi - features_j.phi)**2 # grasp_i.close_fingers(obj, vis=True) # grasp_j.close_fingers(obj, vis=True) grasp_i.surface_information(obj, config['window_width'], config['window_steps']) grasp_j.surface_information(obj, config['window_width'], config['window_steps']) w = config['window_steps'] wi1 = np.reshape(features_i.extractors_[0].extractors_[1].phi, [w, w]) wi2 = np.reshape(features_i.extractors_[1].extractors_[1].phi, [w, w]) wj1 = np.reshape(features_j.extractors_[0].extractors_[1].phi, [w, w]) wj2 = np.reshape(features_j.extractors_[1].extractors_[1].phi, [w, w]) a = 0.1 plt.figure() plt.subplot(2, 2, 1) plt.imshow(wi1, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wi1') plt.subplot(2, 2, 2) plt.imshow(wi2, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wi2') plt.subplot(2, 2, 3) plt.imshow(wj1, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wj1') plt.subplot(2, 2, 4) plt.imshow(wj2, cmap=plt.cm.Greys, interpolation='none') plt.colorbar() plt.clim(-a, a) # fixing color range for visual comparisons plt.title('wj2') # plt.show() # IPython.embed() num_trials = config['num_trials'] ts_rewards = [] ts_corr_rewards = [] for t in range(num_trials): logging.info('Trial %d' % (t)) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) ts_rewards.append(ts_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) # get the bandit rewards all_ts_rewards = np.array(ts_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) # get correlations and plot k = kernel.matrix(candidates) k_vec = k.ravel() pfc_arr = np.array([estimated_pfc]).T pfc_diff = ssd.squareform(ssd.pdist(pfc_arr)) pfc_vec = pfc_diff.ravel() bad_ind = np.where(pfc_diff > 1.0 - k) plt.figure() plt.scatter(k_vec, pfc_vec) plt.xlabel('Kernel', fontsize=15) plt.ylabel('PFC Diff', fontsize=15) plt.title('Correlations', fontsize=15) # plt.show() # IPython.embed() # plot params line_width = config['line_width'] font_size = config['font_size'] dpi = config['dpi'] # plot histograms num_bins = 100 bin_edges = np.linspace(0, 1, num_bins + 1) plt.figure() n, bins, patches = plt.hist(estimated_pfc, bin_edges) plt.xlabel('Probability of Success', fontsize=font_size) plt.ylabel('Num Grasps', fontsize=font_size) plt.title('Histogram of Grasps by Probability of Success', fontsize=font_size) # plot the results plt.figure() plt.plot(ts_result.iters, avg_ts_rewards, c=u'g', linewidth=line_width, label='Thompson Sampling (Uncorrelated)') plt.plot(ts_corr_result.iters, avg_ts_corr_rewards, c=u'r', linewidth=line_width, label='Thompson Sampling (Correlated)') plt.xlim(0, np.max(ts_result.iters)) plt.ylim(0.5, 1) plt.xlabel('Iteration', fontsize=font_size) plt.ylabel('Normalized Probability of Force Closure', fontsize=font_size) plt.title('Avg Normalized PFC vs Iteration', fontsize=font_size) handles, labels = plt.gca().get_legend_handles_labels() plt.legend(handles, labels, loc='lower right') plt.show() IPython.embed() """ # aggregate grasps object_grasps = [candidates[i].grasp for i in ts_result.best_candidates] grasp_qualities = list(ts_result.best_pred_means) bandit_stop = time.clock() logging.info('Bandits took %f sec' %(bandit_stop - bandit_start)) # get rotated, translated versions of grasps delay = 0 pr2_grasps = [] pr2_grasp_qualities = [] theta_res = config['grasp_theta_res'] * np.pi # grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps']) if config['vis_grasps']: delay = config['vis_delay'] for grasp, grasp_quality in zip(object_grasps, grasp_qualities): rotated_grasps = grasp.transform(obj.tf, theta_res) # rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay) pr2_grasps.extend(rotated_grasps) pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps)) logging.info('Num grasps: %d' %(len(pr2_grasps))) grasp_filename = os.path.join(dest, obj.key + '.json') with open(grasp_filename, 'w') as f: jsons.dump([g.to_json(quality=q) for g, q in zip(pr2_grasps, pr2_grasp_qualities)], f) ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) return BanditCorrelatedExperimentResult(ua_normalized_reward, ts_normalized_reward, ts_corr_normalized_reward, ua_result, ts_result, ts_corr_result, obj_key=obj.key) """ return None
def test_antipodal_grasp_thompson(): np.random.seed(100) # h = plt.figure() # ax = h.add_subplot(111, projection = '3d') # load object sdf_3d_file_name = 'data/test/sdf/Co_clean.sdf' sf = sdf_file.SdfFile(sdf_3d_file_name) sdf_3d = sf.read() mesh_name = 'data/test/meshes/Co_clean.obj' of = obj_file.ObjFile(mesh_name) m = of.read() graspable = go.GraspableObject3D(sdf_3d, mesh=m, model_name=mesh_name) config = { 'grasp_width': 0.1, 'friction_coef': 0.5, 'num_cone_faces': 8, 'grasp_samples_per_surface_point': 4, 'dir_prior': 1.0, 'alpha_thresh_div': 32, 'rho_thresh': 0.75, # as pct of object max moment 'vis_antipodal': False, 'min_num_grasps': 20, 'alpha_inc': 1.1, 'rho_inc': 1.1, 'sigma_mu': 0.1, 'sigma_trans_grasp': 0.001, 'sigma_rot_grasp': 0.1, 'sigma_trans_obj': 0.001, 'sigma_rot_obj': 0.1, 'sigma_scale_obj': 0.1, 'num_prealloc_obj_samples': 100, 'num_prealloc_grasp_samples': 0, 'min_num_collision_free_grasps': 10, 'grasp_theta_res': 1 } sampler = ags.AntipodalGraspSampler(config) start_time = time.clock() grasps, alpha_thresh, rho_thresh = sampler.generate_grasps(graspable, vis=False) end_time = time.clock() duration = end_time - start_time logging.info('Antipodal grasp candidate generation took %f sec' % (duration)) # convert grasps to RVs for optimization graspable_rv = GraspableObjectGaussianPose(graspable, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) candidates = [] for grasp in grasps: grasp_rv = ParallelJawGraspGaussian(grasp, config) candidates.append(ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)) objective = objectives.RandomBinaryObjective() # run bandits eps = 5e-4 ua_tc_list = [tc.MaxIterTerminationCondition(1000) ] #, tc.ConfidenceTerminationCondition(eps)] ua = das.UniformAllocationMean(objective, candidates) ua_result = ua.solve( termination_condition=tc.OrTerminationCondition(ua_tc_list), snapshot_rate=100) logging.info('Uniform allocation took %f sec' % (ua_result.total_time)) ts_tc_list = [ tc.MaxIterTerminationCondition(1000), tc.ConfidenceTerminationCondition(eps) ] ts = das.ThompsonSampling(objective, candidates) ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(ts_tc_list), snapshot_rate=100) logging.info('Thompson sampling took %f sec' % (ts_result.total_time)) true_means = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) # plot results plt.figure() plot_value_vs_time_beta_bernoulli(ua_result, true_means, color='red') plot_value_vs_time_beta_bernoulli(ts_result, true_means, color='blue') plt.show() das.plot_num_pulls_beta_bernoulli(ua_result) plt.title('Observations Per Variable for Uniform allocation') das.plot_num_pulls_beta_bernoulli(ts_result) plt.title('Observations Per Variable for Thompson sampling') plt.show()
def extract_features(obj, dest, feature_dest, config): # sample grasps sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions']) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps( obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions']) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps( obj, check_collisions=config['check_collisions']) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Grasp candidate generation took %f sec' % (sample_duration)) if not grasps or len(grasps) == 0: logging.info('Skipping %s' % (obj.key)) return # compute all features feature_start = time.clock() feature_extractor = ff.GraspableFeatureExtractor(obj, config) all_features = feature_extractor.compute_all_features(grasps) feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Feature extraction took %f sec' % (feature_duration)) # generate pfc candidates graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) candidates = [] logging.info('%d grasps, %d valid features', len(grasps), len(all_features) - all_features.count(None)) for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d candidate' % (len(candidates))) if features is None: logging.info('No features computed.') continue grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) pfc_rv.set_features(features) candidates.append(pfc_rv) logging.info('%d candidates', len(candidates)) # brute force with uniform allocation brute_force_iter = config['bandit_brute_force_iter'] snapshot_rate = config['bandit_snapshot_rate'] def phi(rv): return rv.features objective = objectives.RandomBinaryObjective() ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') bandit_start = time.clock() ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) bandit_end = time.clock() bandit_duration = bandit_end - bandit_start logging.info('Uniform allocation (%d iters) took %f sec' % (brute_force_iter, bandit_duration)) cand_grasps = [c.grasp for c in candidates] cand_features = [c.features_ for c in candidates] final_model = ua_result.models[-1] estimated_pfc = models.BetaBernoulliModel.beta_mean( final_model.alphas, final_model.betas) if len(cand_grasps) != len(estimated_pfc): logging.warning( 'Number of grasps does not match estimated pfc results.') IPython.embed() # write to file grasp_filename = os.path.join(dest, obj.key + '.json') with open(grasp_filename, 'w') as grasp_file: jsons.dump([ g.to_json(quality=q, num_successes=a, num_failures=b) for g, q, a, b in zip(cand_grasps, estimated_pfc, final_model.alphas, final_model.betas) ], grasp_file) # HACK to make paths relative features_as_json = [f.to_json(feature_dest) for f in cand_features] output_dest = os.path.split(dest)[0] for feature_as_json in features_as_json: feature_as_json = list(feature_as_json.values())[0] for wname in ('w1', 'w2'): wdata = feature_as_json[wname] for k, v in wdata.items(): wdata[k] = os.path.relpath( v, output_dest) # relative to output_dest feature_filename = os.path.join(feature_dest, obj.key + '.json') with open(feature_filename, 'w') as feature_file: jsons.dump(features_as_json, feature_file)
def eval_hyperparams(obj, chunk, config, plot=False, priors_dataset=None, nearest_features_names=None): """Label an object with grasps according to probability of force closure, using correlated bandits.""" # bandit params num_trials = config['num_trials'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] bandit_start = time.clock() np.random.seed(100) candidates = load_candidate_grasps(obj, chunk) if candidates is None: return None # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # compute priors logging.info('Computing priors') if priors_dataset is None: priors_dataset = chunk prior_engine = pce.PriorComputationEngine(priors_dataset, config) # Compute priors all_alpha_priors = [] all_beta_priors = [] prior_comp_times = [] if nearest_features_names == None: alpha_priors, beta_priors = prior_engine.compute_priors( obj, candidates) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) else: for nearest_features_name in nearest_features_names: logging.info('Computing priors using %s' % (nearest_features_name)) priors_start_time = time.time() alpha_priors, beta_priors, neighbor_keys, neighbor_distances, neighbor_kernels, neighbor_pfc_diffs, num_grasp_neighbors = \ prior_engine.compute_priors(obj, candidates, nearest_features_name=nearest_features_name) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) priors_end_time = time.time() prior_comp_times.append(priors_end_time - priors_start_time) logging.info( 'Priors for %s took %f' % (nearest_features_name, priors_end_time - priors_start_time)) # pre-computed pfc values logging.info('Computing regression errors') true_pfc = np.array([c.grasp.quality for c in candidates]) prior_alphas = np.ones(true_pfc.shape) prior_betas = np.ones(true_pfc.shape) prior_pfc = 0.5 * np.ones(true_pfc.shape) ce_loss = objectives.CrossEntropyLoss(true_pfc) se_loss = objectives.SquaredErrorLoss(true_pfc) we_loss = objectives.WeightedSquaredErrorLoss(true_pfc) ccbp_ll = objectives.CCBPLogLikelihood(true_pfc) ce_vals = [ce_loss(prior_pfc)] se_vals = [se_loss(prior_pfc)] we_vals = [se_loss(prior_pfc)] # uniform weights at first ccbp_vals = [ccbp_ll.evaluate(prior_alphas, prior_betas)] total_weights = [len(candidates)] # compute estimated pfc values from alphas and betas for alpha_prior, beta_prior in zip(all_alpha_priors, all_beta_priors): estimated_pfc = models.BetaBernoulliModel.beta_mean( np.array(alpha_prior), np.array(beta_prior)) estimated_vars = models.BetaBernoulliModel.beta_variance( np.array(alpha_prior), np.array(beta_prior)) # compute losses ce_vals.append(ce_loss(estimated_pfc)) se_vals.append(se_loss(estimated_pfc)) we_vals.append(we_loss.evaluate(estimated_pfc, estimated_vars)) ccbp_vals.append( ccbp_ll.evaluate(np.array(alpha_prior), np.array(beta_prior))) total_weights.append(np.sum(estimated_vars)) ce_vals = np.array(ce_vals) se_vals = np.array(se_vals) we_vals = np.array(we_vals) ccbp_vals = np.array(ccbp_vals) total_weights = np.array(total_weights) # create hyperparam dict num_grasps = len(candidates) hyperparams = {} hyperparams['weight_grad'] = config['weight_grad_x'] hyperparams['weight_moment'] = config['weight_gravity'] hyperparams['weight_shape'] = config['prior_neighbor_weight'] hyperparams['num_neighbors'] = config['prior_num_neighbors'] return HyperparamEvalResult(ce_vals, se_vals, we_vals, ccbp_vals, num_grasps, total_weights, hyperparams, prior_comp_times, obj_key=obj.key, neighbor_keys=neighbor_keys)
def test_window_correlation(width, num_steps, vis=True): import scipy import sdf_file, obj_file import discrete_adaptive_samplers as das import experiment_config as ec import feature_functions as ff import graspable_object as go # weird Python issues import kernels import models import objectives import pfc import termination_conditions as tc np.random.seed(100) mesh_file_name = 'data/test/meshes/Co_clean.obj' sdf_3d_file_name = 'data/test/sdf/Co_clean.sdf' config = ec.ExperimentConfig('cfg/correlated.yaml') config['window_width'] = width config['window_steps'] = num_steps brute_force_iter = 100 snapshot_rate = config['bandit_snapshot_rate'] sdf = sdf_file.SdfFile(sdf_3d_file_name).read() mesh = obj_file.ObjFile(mesh_file_name).read() graspable = go.GraspableObject3D(sdf, mesh) grasp_axis = np.array([0, 1, 0]) grasp_width = 0.1 grasps = [] for z in [-0.030, -0.035, -0.040, -0.045]: grasp_center = np.array([0, 0, z]) grasp = g.ParallelJawPtGrasp3D( ParallelJawPtGrasp3D.configuration_from_params( grasp_center, grasp_axis, grasp_width)) grasps.append(grasp) graspable_rv = pfc.GraspableObjectGaussianPose(graspable, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV # compute feature vectors for all grasps feature_extractor = ff.GraspableFeatureExtractor(graspable, config) all_features = feature_extractor.compute_all_features(grasps) candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' % len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) pfc_rv.set_features(features) candidates.append(pfc_rv) if vis: _, (c1, c2) = grasp.close_fingers(graspable) plt.figure() c1_proxy = c1.plot_friction_cone(color='m') c2_proxy = c2.plot_friction_cone(color='y') plt.legend([c1_proxy, c2_proxy], ['Cone 1', 'Cone 2']) plt.title('Grasp %d' % (len(candidates))) objective = objectives.RandomBinaryObjective() ua = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_result = ua.solve( termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=snapshot_rate) estimated_pfc = models.BetaBernoulliModel.beta_mean( ua_result.models[-1].alphas, ua_result.models[-1].betas) print 'true pfc' print estimated_pfc def phi(rv): return rv.features kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) print 'kernel matrix' print kernel.matrix(candidates) if vis: plt.show()
def label_correlated(obj, chunk, config, plot=False, priors_dataset=None, nearest_features_names=None): """Label an object with grasps according to probability of force closure, using correlated bandits.""" # bandit params num_trials = config['num_trials'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] bandit_start = time.clock() np.random.seed(100) candidates = load_candidate_grasps(obj, chunk) if candidates is None: return None # feature transform def phi(rv): return rv.features nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # compute priors logging.info('Computing priors') if priors_dataset is None: priors_dataset = chunk prior_engine = pce.PriorComputationEngine(priors_dataset, config) # Compute priors all_alpha_priors = [] all_beta_priors = [] prior_comp_times = [] if nearest_features_names == None: alpha_priors, beta_priors = prior_engine.compute_priors( obj, candidates) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) else: for nearest_features_name in nearest_features_names: logging.info('Computing priors using %s' % (nearest_features_name)) priors_start_time = time.time() alpha_priors, beta_priors, neighbor_keys, neighbor_distances, neighbor_kernels, neighbor_pfc_diffs, num_grasp_neighbors = \ prior_engine.compute_priors(obj, candidates, nearest_features_name=nearest_features_name) all_alpha_priors.append(alpha_priors) all_beta_priors.append(beta_priors) priors_end_time = time.time() prior_comp_times.append(priors_end_time - priors_start_time) logging.info( 'Priors for %s took %f' % (nearest_features_name, priors_end_time - priors_start_time)) # pre-computed pfc values logging.info('Computing regression errors') true_pfc = np.array([c.grasp.quality for c in candidates]) prior_alphas = np.ones(true_pfc.shape) prior_betas = np.ones(true_pfc.shape) prior_pfc = 0.5 * np.ones(true_pfc.shape) ce_loss = objectives.CrossEntropyLoss(true_pfc) se_loss = objectives.SquaredErrorLoss(true_pfc) we_loss = objectives.WeightedSquaredErrorLoss(true_pfc) ccbp_ll = objectives.CCBPLogLikelihood(true_pfc) ce_vals = [ce_loss(prior_pfc)] se_vals = [se_loss(prior_pfc)] we_vals = [se_loss(prior_pfc)] # uniform weights at first ccbp_vals = [ccbp_ll.evaluate(prior_alphas, prior_betas)] total_weights = [len(candidates)] # compute estimated pfc values from alphas and betas for alpha_prior, beta_prior in zip(all_alpha_priors, all_beta_priors): estimated_pfc = models.BetaBernoulliModel.beta_mean( np.array(alpha_prior), np.array(beta_prior)) estimated_vars = models.BetaBernoulliModel.beta_variance( np.array(alpha_prior), np.array(beta_prior)) # compute losses ce_vals.append(ce_loss(estimated_pfc)) se_vals.append(se_loss(estimated_pfc)) we_vals.append(we_loss.evaluate(estimated_pfc, estimated_vars)) ccbp_vals.append( ccbp_ll.evaluate(np.array(alpha_prior), np.array(beta_prior))) total_weights.append(np.sum(estimated_vars)) ce_vals = np.array(ce_vals) se_vals = np.array(se_vals) we_vals = np.array(we_vals) ccbp_vals = np.array(ccbp_vals) total_weights = np.array(total_weights) # setup reward buffers ua_rewards = [] ts_rewards = [] gi_rewards = [] ts_corr_rewards = [] bucb_corr_rewards = [] all_ts_corr_prior_rewards = [] for x in range(0, len(all_alpha_priors)): all_ts_corr_prior_rewards.append([]) all_bucb_corr_prior_rewards = [] for x in range(0, len(all_alpha_priors)): all_bucb_corr_prior_rewards.append([]) # setup runtime buffers ua_runtimes = [] ts_runtimes = [] gi_runtimes = [] ts_corr_runtimes = [] bucb_corr_runtimes = [] all_ts_corr_prior_runtimes = [] for x in range(0, len(all_alpha_priors)): all_ts_corr_prior_runtimes.append([]) all_bucb_corr_prior_runtimes = [] for x in range(0, len(all_alpha_priors)): all_bucb_corr_prior_runtimes.append([]) # run bandits for several trials logging.info('Running bandits') for t in range(num_trials): logging.info('Trial %d' % (t)) # Uniform sampling ua = das.UniformAllocationMean(objective, candidates) logging.info('Running Uniform allocation.') ua_result = ua.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Gittins indices gi = das.GittinsIndex98(objective, candidates) logging.info('Running Gittins Indices.') gi_result = gi.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], p=config['lb_alpha']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence bucb_corr = das.CorrelatedGittins( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], p=config['lb_alpha']) #horizon=max_iter) logging.info('Running correlated Bayes UCB.') bucb_corr_result = bucb_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated MAB for faster convergence all_ts_corr_prior_ind = [] all_bucb_corr_prior_ind = [] for alpha_priors, beta_priors, ts_corr_prior_rewards, bucb_corr_prior_rewards, ts_corr_runtimes, bucb_corr_runtimes, nearest_features_name in \ zip(all_alpha_priors, all_beta_priors, all_ts_corr_prior_rewards, all_bucb_corr_prior_rewards, all_ts_corr_prior_runtimes, all_bucb_corr_prior_runtimes, nearest_features_names): # thompson sampling ts_corr_prior = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], alpha_prior=alpha_priors, beta_prior=beta_priors, p=config['lb_alpha']) logging.info( 'Running correlated Thompson sampling with priors from %s' % (nearest_features_name)) ts_corr_prior_result = ts_corr_prior.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) ts_corr_prior_normalized_reward = reward_vs_iters( ts_corr_prior_result, true_pfc) ts_corr_prior_rewards.append(ts_corr_prior_normalized_reward) ts_corr_runtimes.append(ts_corr_prior_result.total_time) all_ts_corr_prior_ind.append(ts_corr_prior_result.best_pred_ind) # bayes ucb bucb_corr = das.CorrelatedGittins( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'], #horizon=max_iter, alpha_prior=alpha_priors, beta_prior=beta_priors, p=config['lb_alpha']) logging.info('Running correlated Bayes UCB with priors from %s' % (nearest_features_name)) bucb_corr_prior_result = bucb_corr.solve( termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) bucb_corr_prior_normalized_reward = reward_vs_iters( bucb_corr_prior_result, true_pfc) bucb_corr_prior_rewards.append(bucb_corr_prior_normalized_reward) bucb_corr_runtimes.append(bucb_corr_prior_result.total_time) all_bucb_corr_prior_ind.append( bucb_corr_prior_result.best_pred_ind) # compile results ua_normalized_reward = reward_vs_iters(ua_result, true_pfc) ts_normalized_reward = reward_vs_iters(ts_result, true_pfc) gi_normalized_reward = reward_vs_iters(gi_result, true_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, true_pfc) bucb_corr_normalized_reward = reward_vs_iters(bucb_corr_result, true_pfc) ua_rewards.append(ua_normalized_reward) ts_rewards.append(ts_normalized_reward) gi_rewards.append(gi_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) bucb_corr_rewards.append(bucb_corr_normalized_reward) ua_runtimes.append(ua_result.total_time) ts_runtimes.append(ts_result.total_time) gi_runtimes.append(gi_result.total_time) ts_corr_runtimes.append(ts_corr_result.total_time) bucb_corr_runtimes.append(bucb_corr_result.total_time) if num_trials == 0: return None # get the bandit rewards all_ua_rewards = np.array(ua_rewards) all_ts_rewards = np.array(ts_rewards) all_gi_rewards = np.array(gi_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) all_bucb_corr_rewards = np.array(bucb_corr_rewards) all_avg_ts_corr_prior_rewards = [] for ts_corr_prior_rewards in all_ts_corr_prior_rewards: all_avg_ts_corr_prior_rewards.append( np.mean(np.array(ts_corr_prior_rewards), axis=0)) all_avg_bucb_corr_prior_rewards = [] for bucb_corr_prior_rewards in all_bucb_corr_prior_rewards: all_avg_bucb_corr_prior_rewards.append( np.mean(np.array(bucb_corr_prior_rewards), axis=0)) #all_avg_bucb_corr_prior_rewards.append([]) # get bandit indices ua_ind = ua_result.best_pred_ind ts_ind = ts_result.best_pred_ind ts_corr_ind = ts_corr_result.best_pred_ind bucb_corr_ind = bucb_corr_result.best_pred_ind # compute avg normalized rewards avg_ua_rewards = np.mean(all_ua_rewards, axis=0) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_gi_rewards = np.mean(all_gi_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) avg_bucb_corr_rewards = np.mean(all_bucb_corr_rewards, axis=0) #avg_bucb_corr_rewards = all_bucb_corr_rewards # compute avg runtimes avg_ua_runtimes = np.mean(np.array(ua_runtimes), axis=0) avg_ts_runtimes = np.mean(np.array(ts_runtimes), axis=0) avg_ts_corr_runtimes = np.mean(np.array(ts_corr_runtimes), axis=0) avg_bucb_corr_runtimes = np.mean(np.array(bucb_corr_runtimes), axis=0) all_avg_ts_corr_prior_runtimes = [] for ts_corr_prior_runtimes in all_ts_corr_prior_runtimes: all_avg_ts_corr_prior_runtimes.append( np.mean(np.array(ts_corr_prior_runtimes), axis=0)) all_avg_bucb_corr_prior_runtimes = [] for bucb_corr_prior_runtimes in all_bucb_corr_prior_runtimes: all_avg_bucb_corr_prior_runtimes.append( np.mean(np.array(bucb_corr_prior_runtimes), axis=0)) # kernel matrix kernel_matrix = kernel.matrix(candidates) return BanditCorrelatedPriorExperimentResult( avg_ua_rewards, avg_ts_rewards, avg_gi_rewards, avg_ts_corr_rewards, avg_bucb_corr_rewards, all_avg_ts_corr_prior_rewards, all_avg_bucb_corr_prior_rewards, true_pfc, ua_result.iters, kernel_matrix, [], [], [], ce_vals, ccbp_vals, we_vals, len(candidates), total_weights, ua_ind, ts_ind, ts_corr_ind, bucb_corr_ind, all_ts_corr_prior_ind, all_bucb_corr_prior_ind, avg_ua_runtimes, avg_ts_runtimes, avg_ts_corr_runtimes, avg_bucb_corr_runtimes, all_avg_ts_corr_prior_runtimes, all_avg_bucb_corr_prior_runtimes, prior_comp_times, obj_key=obj.key, neighbor_keys=neighbor_keys)
grasps, data = load_data('grasp_features.hdf5', config) successes = np.array([g.successes for g in grasps]) - 1 # subtract alpha0 failures = np.array([g.failures for g in grasps]) - 1 # subtract beta0 loss = StochasticGraspWeightObjective(data, successes, failures, config) objective = MinimizationObjective(loss) step_policy = ilo.LogStepPolicy(config['step_size_max'], config['step_size_period']) def positive_constraint(x): x[x < 0] = 0 return x optimizer = ilo.ConstrainedGradientAscent(objective, step_policy, [positive_constraint]) start = config['weight_initial'] * np.ones(2 * config['window_steps']**2) logging.info('Starting optimization.') result = optimizer.solve(termination_condition=tc.MaxIterTerminationCondition(config['max_iters']), snapshot_rate=config['snapshot_rate'], start_x=start, true_x=None) proj_win_weight = result.best_x max_weight = np.max(proj_win_weight) opt_weights = proj_win_weight.reshape((2, config['window_steps'], config['window_steps'])) rand_weights = start.reshape((2, config['window_steps'], config['window_steps'])) logging.info('Loss: %f to %f, delta=%f', loss(start), loss(result.best_x), np.linalg.norm(start - result.best_x)) # debugging stuff def min_and_max(arr): return np.min(arr), np.max(arr) ground_truth = loss.mu_
def label_correlated(obj, chunk, dest, config, plot=False, load=True): """Label an object with grasps according to probability of force closure, using correlated bandits.""" bandit_start = time.clock() #np.random.seed(100) # sample grasps sample_start = time.clock() if config['grasp_sampler'] == 'antipodal': logging.info('Using antipodal grasp sampling') sampler = ags.AntipodalGraspSampler(config) grasps = sampler.generate_grasps(obj, check_collisions=config['check_collisions'], vis=False) # pad with gaussian grasps num_grasps = len(grasps) min_num_grasps = config['min_num_grasps'] if num_grasps < min_num_grasps: target_num_grasps = min_num_grasps - num_grasps gaussian_sampler = gs.GaussianGraspSampler(config) gaussian_grasps = gaussian_sampler.generate_grasps(obj, target_num_grasps=target_num_grasps, check_collisions=config['check_collisions'], vis=plot) grasps.extend(gaussian_grasps) else: logging.info('Using Gaussian grasp sampling') sampler = gs.GaussianGraspSampler(config) grasps = sampler.generate_grasps(obj, check_collisions=config['check_collisions'], vis=plot, grasp_gen_mult = 6) sample_end = time.clock() sample_duration = sample_end - sample_start logging.info('Loaded %d grasps' %(len(grasps))) logging.info('Grasp candidate loading took %f sec' %(sample_duration)) if not grasps: logging.info('Skipping %s' %(obj.key)) return None # extract load features for all grasps feature_start = time.clock() feature_extractor = ff.GraspableFeatureExtractor(obj, config) all_features = feature_extractor.compute_all_features(grasps) feature_end = time.clock() feature_duration = feature_end - feature_start logging.info('Loaded %d features' %(len(all_features))) logging.info('Grasp feature loading took %f sec' %(feature_duration)) # bandit params num_trials = config['num_trials'] brute_force_iter = config['bandit_brute_force_iter'] max_iter = config['bandit_max_iter'] confidence = config['bandit_confidence'] snapshot_rate = config['bandit_snapshot_rate'] brute_snapshot_rate = config['bandit_brute_force_snapshot_rate'] tc_list = [ tc.MaxIterTerminationCondition(max_iter), ] # set up randome variables graspable_rv = pfc.GraspableObjectGaussianPose(obj, config) f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV candidates = [] for grasp, features in zip(grasps, all_features): logging.info('Adding grasp %d' %len(candidates)) grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config) pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config) if features is None: logging.info('Could not compute features for grasp.') else: pfc_rv.set_features(features) candidates.append(pfc_rv) # feature transform def phi(rv): return rv.features # create nn structs for kernels nn = kernels.KDTree(phi=phi) kernel = kernels.SquaredExponentialKernel( sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi) objective = objectives.RandomBinaryObjective() # uniform allocation for true values ua_brute = das.UniformAllocationMean(objective, candidates) logging.info('Running uniform allocation for true pfc.') ua_brute_result = ua_brute.solve(termination_condition=tc.MaxIterTerminationCondition(brute_force_iter), snapshot_rate=brute_snapshot_rate) final_model = ua_brute_result.models[-1] estimated_pfc = models.BetaBernoulliModel.beta_mean(final_model.alphas, final_model.betas) save_grasps(grasps, estimated_pfc, obj, dest, num_successes=final_model.alphas, num_failures=final_model.betas) # run bandits for several trials ua_rewards = [] ts_rewards = [] ts_corr_rewards = [] for t in range(num_trials): logging.info('Trial %d' %(t)) # Uniform sampling ua = das.UniformAllocationMean(objective, candidates) logging.info('Running Uniform allocation.') ua_result = ua.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # Thompson sampling ts = das.ThompsonSampling(objective, candidates) logging.info('Running Thompson sampling.') ts_result = ts.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # correlated Thompson sampling for even faster convergence ts_corr = das.CorrelatedThompsonSampling( objective, candidates, nn, kernel, tolerance=config['kernel_tolerance']) logging.info('Running correlated Thompson sampling.') ts_corr_result = ts_corr.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate) # compile results ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc) ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc) ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc) ua_rewards.append(ua_normalized_reward) ts_rewards.append(ts_normalized_reward) ts_corr_rewards.append(ts_corr_normalized_reward) # get the bandit rewards all_ua_rewards = np.array(ua_rewards) all_ts_rewards = np.array(ts_rewards) all_ts_corr_rewards = np.array(ts_corr_rewards) # compute avg normalized rewards avg_ua_rewards = np.mean(all_ua_rewards, axis=0) avg_ts_rewards = np.mean(all_ts_rewards, axis=0) avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0) # kernel matrix kernel_matrix = kernel.matrix(candidates) return BanditCorrelatedExperimentResult(avg_ua_rewards, avg_ts_rewards, avg_ts_corr_rewards, estimated_pfc, ua_result.iters, kernel_matrix, obj_key=obj.key)