def inst_regret(self, t, all_paths, selected_path, robot_model, param = None):
        ''' The instantaneous Kapoor regret of a selected path, according to the specified reward function
        Input:
            all_paths: the set of all avalaible paths to the robot at time t
            selected path: the path selected by the robot at time t 
            robot_model (GP Model)
        '''

        value_omni = {}        
        for path, points in all_paths.items():           
            if param is None:
                value_omni[path] =  self.f_rew(time = t, xvals = points, robot_model = robot_model)  
            else:
                value_omni[path] =  aqlib.mves(time = t, xvals = points, robot_model = robot_model, param = (self.max_val).reshape(1,1))  

        value_max = value_omni[max(value_omni, key = value_omni.get)]
        if param is None:
            value_selected = self.f_rew(time = t, xvals = selected_path, robot_model = robot_model)
        else:
            value_selected =  aqlib.mves(time = t, xvals = selected_path, robot_model = robot_model, param = (self.max_val).reshape(1,1))  
        return value_max - value_selected, value_selected, value_max 
    def update_metrics(self, t, robot_model, all_paths, selected_path):
        ''' Function to update avaliable metrics'''
        # Compute aquisition function
        if (self.f_aqu == aqlib.mves):
            self.metrics['aquisition_function'][t] = self.f_aqu(
                t, selected_path, robot_model, [None])
        else:
            self.metrics['aquisition_function'][t] = self.f_aqu(
                t, selected_path, robot_model)

        # Compute reward functions
        self.metrics['mean_reward'][t] = self.mean_reward(
            t, selected_path, robot_model)
        self.metrics['info_gain_reward'][t] = self.info_gain_reward(
            t, selected_path, robot_model)
        self.metrics['hotspot_info_reward'][t] = self.hotspot_info_reward(
            t, selected_path, robot_model)
        self.metrics['mes_reward_robot'][t] = aqlib.mves(
            t, selected_path, robot_model, [None])
        # Compute other performance metrics
        self.metrics['MSE'][t] = self.MSE(robot_model, NTEST=25)
예제 #3
0
def playback(playback_locs, playback_samples, max_val, column_names):
    ''' Gather noisy samples of the environment and updates the robot's GP model  
    Input: 
        T (int > 0): the length of the planning horization (number of planning iterations)'''

    d = playback_locs
    data = pd.read_table(d, delimiter=" ", header=None)
    data = data.T
    if data.shape[1] > len(column_names):
        data = pd.read_table(d, delimiter=" ", header=None, skipfooter=2)
        data = data.T
    data.columns = column_names
    robot_loc = np.vstack((data['robot_loc_x'], data['robot_loc_y'])).T

    d = playback_samples
    data = pd.read_table(d, delimiter=" ", header=None)
    data = data.T
    data.columns = ['x1', 'x2', 'z']
    sample_loc = np.vstack((data['x1'], data['x2'])).T
    sample_val = data['z'].T

    # Initialize the robot's GP model with the initial kernel parameters
    extent = (0., 10., 0., 10.)
    init_variance = 100.0
    init_lengthscale = 1.0
    noise = 1.001
    GP = gplib.OnlineGPModel(ranges=extent,
                             lengthscale=init_lengthscale,
                             variance=init_variance,
                             noise=noise)

    t_sample_locs = {}
    t_sample_vals = {}
    value_robot = []

    S = 0
    E = 0
    for t, end_loc in enumerate(robot_loc[1:, :]):
        # Get next stop point in stream
        while (not np.isclose(sample_loc[E, 0], end_loc[0])
               or not np.isclose(sample_loc[E, 1], end_loc[1])):
            E += 1
        E += 1

        t_sample_locs[t] = sample_loc[S:E, :]
        t_sample_vals[t] = np.array((sample_val[S:E])).astype('float')
        S = E
        E += 1

        #print "--------------", t, "-----------------"
        #print t_sample_locs[t]
        #print t_sample_vals[t]

        value_robot.append(
            aqlib.mves(time=t,
                       xvals=t_sample_locs[t],
                       robot_model=GP,
                       param=(np.array(max_val)).reshape(1, 1)))
        GP.add_data(
            t_sample_locs[t],
            np.reshape(t_sample_vals[t], (t_sample_locs[t].shape[0], 1)))

    t = 149
    t_sample_locs[t] = sample_loc[S:, :]
    t_sample_vals[t] = np.array((sample_val[S:])).astype('float')
    #print "--------------", t, "-----------------"
    #print t_sample_locs[t]
    #print t_sample_vals[t]

    value_robot.append(
        aqlib.mves(time=t,
                   xvals=t_sample_locs[t],
                   robot_model=GP,
                   param=(np.array(max_val)).reshape(1, 1)))
    #GP.add_data(t_sample_locs[t], np.reshape(t_sample_vals[t], (t_sample_locs[t].shape[0], 1)))

    return np.cumsum(value_robot)