def inst_regret(self, t, all_paths, selected_path, robot_model, param = None): ''' The instantaneous Kapoor regret of a selected path, according to the specified reward function Input: all_paths: the set of all avalaible paths to the robot at time t selected path: the path selected by the robot at time t robot_model (GP Model) ''' value_omni = {} for path, points in all_paths.items(): if param is None: value_omni[path] = self.f_rew(time = t, xvals = points, robot_model = robot_model) else: value_omni[path] = aqlib.mves(time = t, xvals = points, robot_model = robot_model, param = (self.max_val).reshape(1,1)) value_max = value_omni[max(value_omni, key = value_omni.get)] if param is None: value_selected = self.f_rew(time = t, xvals = selected_path, robot_model = robot_model) else: value_selected = aqlib.mves(time = t, xvals = selected_path, robot_model = robot_model, param = (self.max_val).reshape(1,1)) return value_max - value_selected, value_selected, value_max
def update_metrics(self, t, robot_model, all_paths, selected_path): ''' Function to update avaliable metrics''' # Compute aquisition function if (self.f_aqu == aqlib.mves): self.metrics['aquisition_function'][t] = self.f_aqu( t, selected_path, robot_model, [None]) else: self.metrics['aquisition_function'][t] = self.f_aqu( t, selected_path, robot_model) # Compute reward functions self.metrics['mean_reward'][t] = self.mean_reward( t, selected_path, robot_model) self.metrics['info_gain_reward'][t] = self.info_gain_reward( t, selected_path, robot_model) self.metrics['hotspot_info_reward'][t] = self.hotspot_info_reward( t, selected_path, robot_model) self.metrics['mes_reward_robot'][t] = aqlib.mves( t, selected_path, robot_model, [None]) # Compute other performance metrics self.metrics['MSE'][t] = self.MSE(robot_model, NTEST=25)
def playback(playback_locs, playback_samples, max_val, column_names): ''' Gather noisy samples of the environment and updates the robot's GP model Input: T (int > 0): the length of the planning horization (number of planning iterations)''' d = playback_locs data = pd.read_table(d, delimiter=" ", header=None) data = data.T if data.shape[1] > len(column_names): data = pd.read_table(d, delimiter=" ", header=None, skipfooter=2) data = data.T data.columns = column_names robot_loc = np.vstack((data['robot_loc_x'], data['robot_loc_y'])).T d = playback_samples data = pd.read_table(d, delimiter=" ", header=None) data = data.T data.columns = ['x1', 'x2', 'z'] sample_loc = np.vstack((data['x1'], data['x2'])).T sample_val = data['z'].T # Initialize the robot's GP model with the initial kernel parameters extent = (0., 10., 0., 10.) init_variance = 100.0 init_lengthscale = 1.0 noise = 1.001 GP = gplib.OnlineGPModel(ranges=extent, lengthscale=init_lengthscale, variance=init_variance, noise=noise) t_sample_locs = {} t_sample_vals = {} value_robot = [] S = 0 E = 0 for t, end_loc in enumerate(robot_loc[1:, :]): # Get next stop point in stream while (not np.isclose(sample_loc[E, 0], end_loc[0]) or not np.isclose(sample_loc[E, 1], end_loc[1])): E += 1 E += 1 t_sample_locs[t] = sample_loc[S:E, :] t_sample_vals[t] = np.array((sample_val[S:E])).astype('float') S = E E += 1 #print "--------------", t, "-----------------" #print t_sample_locs[t] #print t_sample_vals[t] value_robot.append( aqlib.mves(time=t, xvals=t_sample_locs[t], robot_model=GP, param=(np.array(max_val)).reshape(1, 1))) GP.add_data( t_sample_locs[t], np.reshape(t_sample_vals[t], (t_sample_locs[t].shape[0], 1))) t = 149 t_sample_locs[t] = sample_loc[S:, :] t_sample_vals[t] = np.array((sample_val[S:])).astype('float') #print "--------------", t, "-----------------" #print t_sample_locs[t] #print t_sample_vals[t] value_robot.append( aqlib.mves(time=t, xvals=t_sample_locs[t], robot_model=GP, param=(np.array(max_val)).reshape(1, 1))) #GP.add_data(t_sample_locs[t], np.reshape(t_sample_vals[t], (t_sample_locs[t].shape[0], 1))) return np.cumsum(value_robot)