def run_greedy_ipp(self, num_runs=10, criterion='entropy', strategy='MaxEnt', disp=True): self._setup_ipp(criterion) for i in range(num_runs): print(i) if disp: print('\n==================================================================================================') print('Run {}/{}'.format(i+1, num_runs)) run_start = time.time() # greedily select static samples new_gp_indices = self.greedy(self.num_samples_per_batch) waypoints = [tuple(self.env.gp_index_to_map_pose(x)) for x in new_gp_indices] next_static_locations = np.stack(waypoints) self.static_locations = np.concatenate([self.static_locations, next_static_locations]).astype(int) # Gather data along path if disp: print('------ Finding valid paths ---------') print('Pose:',self.pose, 'Heading:', self.heading, 'Waypoints:', waypoints) # move to the nearest waypoint costs, seq = self.env.map.nearest_waypoint_path_cost(self.pose, self.heading, waypoints, return_seq=True) for i in range(len(seq)): paths_checkpoints, paths_indices, paths_cost = self.env.get_all_paths(self.pose, self.heading, [waypoints[seq[i]]], costs[i], slack=0) assert costs[i]==paths_cost[0], 'path costs do not match' # find optimal path if strategy == 'Shortest': best_idx = find_shortest_path(paths_cost) else: best_idx = self.best_path(paths_indices, [new_gp_indices[seq[i]]]) if strategy == 'Equi-Sample': best_idx = find_equi_sample_path(paths_indices, best_idx) next_path = np.stack(self.env.get_path_from_checkpoints(paths_checkpoints[best_idx]))[1:] next_path_indices, stds = self.get_samples_sequence_from_path(next_path, waypoints) self.path = np.concatenate([self.path, next_path], axis=0).astype(int) self.pose = tuple(self.path[-1]) self.heading = get_heading(self.path[-2], self.path[-1]) # gather samples self._add_samples(next_path_indices, stds) run_end = time.time() if disp: print('\nTotal Time consumed in run {}: {:.4f}'.format(i+1, run_end - run_start)) pred, var = self.predict(return_var=True) error = compute_mae(self.env.test_Y, pred) print('==========================================================') print('Strategy: {:s}'.format(strategy)) print('--- Final statistics --- ') print('Test ERROR: {:.4f}'.format(error)) print('Predictive Variance Max: {:.3f} Min: {:.3f} Mean: {:.3f}'.format(var.max(), var.min(), var.mean()))
def learn(self, iterations, predict_every=5): # notations: # X - sampled set # X_{t} - sampled set of type t # V_{t} - entire sampling set of type t all_mae = [] # select samples greedily with the highest conditional utility for j in range(iterations): # print('Iteration {:d}/{:d}'.format(j+1, iterations)) utilites = np.full(self.env.num_samples, -np.inf) indices = np.arange(self.env.num_samples) # all remaining samples rem = indices[~self.sampled] x = self.env.X[rem] t = self.env.ind[rem] mu, var = self.gp.predict(x, t, return_var=True) ent = entropy_from_var(var) # all remaining samples of type t rem_t = indices[~self.sampled * (self.env.ind == self.env.target_task)] x_t = self.env.X[rem_t] t_t = np.full(len(x_t), self.env.target_task) mu_t, var_t = self.sec_gp.predict(x_t, t_t, return_var=True) ent_t = entropy_from_var(var_t) utilites[rem] = ent utilites[rem_t] -= ent_t best_idx = np.argmax(utilites) self.sampled[best_idx] = True # modify train data of main gp self.update_model(gp='main') # modify train data of secondary gp if chosen type is different from target type if self.env.ind[best_idx] != self.env.target_task: self.update_model(gp='secondary') if (j + 1) % predict_every == 0: mu = self.gp.predict(self.env.test_X, self.env.test_ind) mae = compute_mae(self.env.test_Y, mu) all_mae.append(mae) print('Iteration: {:d}/{:d} Test MAE: {:3f}'.format( j + 1, iterations, mae)) return all_mae
def prediction_vs_distance(self, test_every, num_runs): count = 0 all_error = [] all_mi = [] all_var = [] while count < test_every*num_runs: count += test_every inds = np.array(self.collected['ind'][:count]) valid = inds!=-1 x = self.env.X[inds[valid]] var = np.array(self.collected['std'])[:count][valid]**2 y = np.array(self.collected['y'])[:count][valid] mu, cov, mi = predictive_distribution(self.gp, x, y, self.env.test_X, var, return_mi=True, return_cov=True) error = compute_mae(self.env.test_Y, mu) all_error.append(error) all_mi.append(mi) all_var.append(np.diag(cov).mean()) results = {'mean': mu, 'error': all_error, 'mi': all_mi, 'mean_var': all_var} return results
def compare_all_strategies(args): # compare all 5 strategies on the same environment strategies = [ 'MaxEnt', 'Shortest', 'Equi-Sample', 'Naive Static', 'Naive Mobile' ] ipp_strategies = ['MaxEnt', 'Shortest', 'Equi-Sample'] naive_strategies = ['Naive Static', 'Naive Mobile'] num_strategies = len(strategies) nsims = 10 test_every = 10 num_naive_runs = 20 max_dist = test_every * num_naive_runs disp = False # set some initial samples initial_samples = 5 error_results = [[] for _ in range(num_strategies)] mi_results = [[] for _ in range(num_strategies)] var_results = [[] for _ in range(num_strategies)] sample_count = [[] for _ in range(num_strategies)] noise_ratio = 5 for t in range(nsims): env = FieldEnv(data_file=args.data_file, phenotype=args.phenotype, num_test=args.num_test) master = Agent(env, args, static_std=args.static_std) master.reset() master.pilot_survey(num_samples=initial_samples, std=master.static_std) mu, cov, zero_mi = master.predict(x=env.test_X, return_cov=True, return_mi=True) zero_error = compute_mae(mu, env.test_Y) zero_mean_var = np.diag(cov).mean() # It is not necessary to make separate agents but is useful for debugging purposes agents = [ Agent(env, args, parent_agent=master, static_std=args.static_std, mobile_std=noise_ratio * args.static_std) for _ in range(num_strategies) ] for i in range(num_strategies): if strategies[i] in ipp_strategies: # res = agents[i].run_ipp(num_runs=args.num_runs, strategy=strategies[i], disp=disp) res = agents[i].run_greedy_ipp(num_runs=args.num_runs, strategy=strategies[i], disp=disp) res = agents[i].prediction_vs_distance(test_every=test_every, num_runs=num_naive_runs) elif strategies[i] in naive_strategies: std = master.static_std if 'Static' in strategies[ i] else master.mobile_std res = agents[i].run_naive(std=std, counts=[test_every] * num_naive_runs, metric='distance') else: raise NotImplementedError error_results[i].append([zero_error] + res['error']) mi_results[i].append([zero_mi] + res['mi']) var_results[i].append([zero_mean_var] + res['mean_var']) sample_count[i].append( path_to_sample_count(env, agents[i].path)[:max_dist]) start = test_every x = [initial_samples] + list( np.arange(start, start + test_every * num_naive_runs, test_every)) # x = np.stack([x for _ in range(nsims)]).flatten() x = np.tile(x, nsims) xlabel = 'Distance travelled' ci = 50 # test error errors = [np.stack(res).flatten() for res in error_results] dct_err = {'x': x} for y, lbl in zip(errors, strategies): dct_err[lbl] = y df_err = pd.DataFrame.from_dict(dct_err) ylabel = 'Test MAE' generate_lineplots(df_err, x='x', xlabel=xlabel, ylabel=ylabel, legends=strategies, ci=ci) # sample_count vs distance all_sample_count = [np.stack(sc).flatten() for sc in sample_count] dist = np.tile(np.arange(1, 1 + max_dist), nsims) dct_sc = {'x': dist} for y, lbl in zip(all_sample_count, strategies): dct_sc[lbl] = y df_sc = pd.DataFrame.from_dict(dct_sc) ylabel_sc = 'Number of samples' generate_lineplots(df_sc, x='x', xlabel=xlabel, ylabel=ylabel_sc, legends=strategies, ci=ci) ipdb.set_trace()
def compare_maxent(args): nsims = 10 test_every = 10 num_naive_runs = 25 disp = False # set some initial samples initial_samples = 5 # noise_ratios = [1,2,5,10] # variants = ['test_every = ' + str(n) for n in noise_ratios] slacks = [0, 5, 10, 15] variants = ['slack = ' + str(s) for s in slacks] nv = len(variants) error_results = [[] for _ in range(nv)] mi_results = [[] for _ in range(nv)] var_results = [[] for _ in range(nv)] for t in range(nsims): env = FieldEnv(data_file=args.data_file, phenotype=args.phenotype, num_test=args.num_test) master = Agent(env, args, static_std=args.static_std) master.reset() master.pilot_survey(num_samples=initial_samples, std=master.static_std) mu, cov, zero_mi = master.predict(x=env.test_X, return_cov=True, return_mi=True) zero_error = compute_mae(mu, env.test_Y) zero_mean_var = np.diag(cov).mean() # It is not necessary to make separate agents but is useful for debugging purposes # agents = [Agent(env, args, parent_agent=master, static_std=args.static_std, mobile_std=kappa*args.static_std) for kappa in noise_ratios] agents = [ Agent(env, args, parent_agent=master, static_std=args.static_std, mobile_std=5 * args.static_std) for _ in range(nv) ] for i in range(nv): res = agents[i].run_ipp(num_runs=args.num_runs, strategy='MaxEnt', disp=disp, slack=slacks[i]) # res = agents[i].run_ipp(num_runs=args.num_runs, strategy='MaxEnt', disp=disp, slack=0) res = agents[i].prediction_vs_distance(test_every=test_every, num_runs=num_naive_runs) error_results[i].append([zero_error] + res['error']) mi_results[i].append([zero_mi] + res['mi']) var_results[i].append([zero_mean_var] + res['mean_var']) start = test_every x = [initial_samples] + list( np.arange(start, start + test_every * num_naive_runs, test_every)) x = np.stack([x for _ in range(nsims)]).flatten() xlabel = 'Distance travelled' ci = 50 # test error errors = [np.stack(res).flatten() for res in error_results] dct_err = {'x': x} for y, lbl in zip(errors, variants): dct_err[lbl] = y df_err = pd.DataFrame.from_dict(dct_err) ylabel = 'Test MAE' generate_lineplots(df_err, x='x', xlabel=xlabel, ylabel=ylabel, legends=variants, ci=ci) # test variance dct_var = {'x': x} varss = [np.stack(res).flatten() for res in var_results] for y, lbl in zip(varss, variants): dct_var[lbl] = y df_var = pd.DataFrame.from_dict(dct_var) ylabel_var = 'Test Mean Variance' generate_lineplots(df_var, x='x', xlabel=xlabel, ylabel=ylabel_var, legends=variants, ci=ci) ipdb.set_trace()
from agent import Master, Agent from arguments import get_args from utils import compute_mae if __name__ == '__main__': args = get_args() # Environment with Jura dataset env = JuraEnv() # gp model hyperparameters are learned by the master print('Learning Model Hyperparameters ... ') master = Master(env, args) # compute the MAE on the test set conditioning on all the training data mu = master.gp.predict(env.test_X, env.test_ind) mae = compute_mae(env.test_Y, mu) print('Ideal MAE: {:3f}\n'.format(mae)) # agent copies the hyperparameters of the master's gp model and performs active learning on the dataset agent = Agent(master) num_samples = 800 predict_every = 50 print('Active Learning ... ') all_mae = agent.learn(iterations=num_samples, predict_every=predict_every) # plot MAE v/s iterations x = np.arange(predict_every, num_samples + 1, predict_every) plt.ylabel('MAE') plt.xlabel('Iterations') plt.plot(x, np.array(all_mae)) plt.show()
# test_y = zero_mean_unit_variance(test_y, std=1) # predict Cd at test locations given Cd, Ni and Zn at training locations and Ni and Zn at test locations num_tasks = len(target_features) # repeat train_x and train_y for all target features # train_ind = [0, 0, ..., 1, 1, ...] ind1 = np.arange(num_tasks).reshape(1, -1).repeat(len(train_x), 0).flatten() ind2 = np.arange(1, num_tasks).reshape(1, -1).repeat(len(test_x), 0).flatten() x1 = np.repeat(train_x, num_tasks, 0) x2 = np.repeat(test_x, num_tasks - 1, 0) y1 = train_y.flatten() y2 = test_y[:, 1:].flatten() # effective training set tx = np.concatenate([x1, x2]) ty = np.concatenate([y1, y2]) tind = np.concatenate([ind1, ind2]) # effective test set test_ind = np.full(len(test_x), 0) kernel_params = {'type': 'rbf'} gp = HadamardMTGP(num_tasks=num_tasks, lr=.1, max_iter=300, kernel_params=kernel_params) gp.fit(tx, tind, ty, disp=True) mu = gp.predict(test_x, test_ind) mae = compute_mae(test_y[:, 0], mu) ipdb.set_trace()
def run_naive(self, std, counts, metric='distance'): # traverse each row from start to end in a naive manner # counts should be list of ints # metric - either distance or sample test_error = [] all_mi = [] all_var = [] for ns in counts: inds = [] c = 0 done = False while not done: # keep moving in the heading direction till you reach the end and need to shift to the next array next_pose = (self.pose[0]+self.heading[0], self.pose[1]+self.heading[1]) ind = self.env.map_pose_to_gp_index_matrix[next_pose] if ind is not None: inds.append(ind) if metric == 'samples': if next_pose[0] == self.env.map.shape[0] - 1: poses = [next_pose, (next_pose[0], next_pose[1]+1), (next_pose[0], next_pose[1]+2), (next_pose[0]-1, next_pose[1]+2)] self.path = np.concatenate([self.path, poses], axis=0).astype(int) self.heading = (-self.heading[0], 0) self.pose = poses[-1] elif next_pose[0] == 0: poses = [next_pose, (next_pose[0], next_pose[1]+1), (next_pose[0], next_pose[1]+2), (next_pose[0]+1, next_pose[1]+2)] self.path = np.concatenate([self.path, poses], axis=0).astype(int) self.heading = (-self.heading[0], 0) self.pose = poses[-1] else: self.path = np.concatenate([self.path, [next_pose]], axis=0).astype(int) self.pose = next_pose done = len(inds)==ns elif metric == 'distance': c += 1 self.path = np.concatenate([self.path, [next_pose]], axis=0).astype(int) if next_pose[0]==0 and next_pose[1]%2==0: self.heading = (0,1) if self.heading==(-1,0) else (1,0) elif next_pose[0]==self.env.map.shape[0]-1 and next_pose[1]%2==0: self.heading = (0,1) if self.heading==(1,0) else (-1,0) self.pose = next_pose done = c==ns else: raise NotImplementedError self._add_samples(inds, [std]*len(inds)) mu, cov, mi = self.predict(return_cov=True, return_mi=True) error = compute_mae(self.env.test_Y, mu) test_error.append(error) all_mi.append(mi) all_var.append(np.diag(cov).mean()) # TODO: implement simulation rendering var = np.diag(cov) strategy = 'Naive Static' if std==self.static_std else 'Naive Mobile' print('==========================================================') print('Strategy: ', strategy) print('--- Final statistics --- ') print('Test ERROR: {:.4f}'.format(error)) print('Predictive Variance Max: {:.3f} Min: {:.3f} Mean: {:.3f}'.format(var.max(), var.min(), var.mean())) results = {'mean': mu, 'error': test_error, 'mi': all_mi, 'mean_var': all_var} return results
def run_ipp(self, render=False, num_runs=10, criterion='entropy', update=False, slack=0, strategy='MaxEnt', disp=True): # informative path planner assert strategy in ['MaxEnt', 'Shortest', 'Equi-Sample'], 'Unknown strategy!!' assert criterion in ['entropy', 'mutual_information'], 'Unknown criterion!!' self._setup_ipp(criterion, update) test_error = [] for i in range(num_runs): if disp: print('\n==================================================================================================') print('Run {}/{}'.format(i+1, num_runs)) run_start = time.time() # greedily select static samples new_gp_indices = self.greedy(self.num_samples_per_batch) waypoints = [tuple(self.env.gp_index_to_map_pose(x)) for x in new_gp_indices] next_static_locations = np.stack(waypoints) self.static_locations = np.concatenate([self.static_locations, next_static_locations]).astype(int) # Gather data along path if disp: print('------ Finding valid paths ---------') print('Pose:',self.pose, 'Heading:', self.heading, 'Waypoints:', waypoints) # find all paths start = time.time() least_cost_ub = self.env.get_heuristic_cost(self.pose, self.heading, waypoints) if disp: print('Least cost upper bound:',least_cost_ub) paths_checkpoints, paths_indices, paths_cost = self.env.get_all_paths(self.pose, self.heading, waypoints, least_cost_ub, slack) end = time.time() if disp: print('Number of feasible paths: ', len(paths_indices)) print('Time consumed {:.4f}'.format(end - start)) print('\n------ Finding best path ----------') # find optimal path start = time.time() if strategy == 'Shortest': best_idx = find_shortest_path(paths_cost) else: best_idx = self.best_path(paths_indices, new_gp_indices) if strategy == 'Equi-Sample': best_idx = find_equi_sample_path(paths_indices, best_idx) end = time.time() if disp: least_cost = min(paths_cost) print('Least cost: {} Best path cost: {}'.format(least_cost, paths_cost[best_idx])) print('Time consumed {:.4f}'.format(end - start)) # update agent's record next_path = np.stack(self.env.get_path_from_checkpoints(paths_checkpoints[best_idx]))[1:] next_path_indices, stds = self.get_samples_sequence_from_path(next_path, waypoints) self.path = np.concatenate([self.path, next_path], axis=0).astype(int) self.pose = tuple(self.path[-1]) self.heading = get_heading(self.path[-2], self.path[-1]) if render: pred = self.predict(self.env.all_x).reshape(self.env.shape) # true = self.env.all_y.reshape(self.env.shape) # self.env.render(paths_checkpoints[best_idx], self.path, next_static_locations, self.static_locations, true, pred) self.env.render(paths_checkpoints[best_idx], self.path, next_static_locations, self.static_locations) # gather samples self._add_samples(next_path_indices, stds) # update hyperparameters of GP model # TODO: this may not work properly right now if update and (i+1) % self.update_every == 0: if disp: print('\n---------- Updating model --------------') start = time.time() self.update_model() self._post_update() end = time.time() if disp: print('Time consumed {:.4f}'.format(end - start)) # predict on test set if disp: print('\n-------- Prediction -------------- ') start = time.time() pred, var = self.predict(return_var=True) error = compute_mae(self.env.test_Y, pred) test_error.append(error) end = time.time() if disp: print('Test ERROR: {:.4f}'.format(error)) print('Predictive Variance Max: {:.3f} Min: {:.3f} Mean: {:.3f}'.format(var.max(), var.min(), var.mean())) print('Time consumed {:.4f}'.format(end - start)) run_end = time.time() if disp: print('\nTotal Time consumed in run {}: {:.4f}'.format(i+1, run_end - run_start)) print('==========================================================') print('Strategy: {:s}'.format(strategy)) print('--- Final statistics --- ') print('Test ERROR: {:.4f}'.format(error)) print('Predictive Variance Max: {:.3f} Min: {:.3f} Mean: {:.3f}'.format(var.max(), var.min(), var.mean())) results = {'mean': pred, 'error':test_error} return results