Esempio n. 1
0
    def run_greedy_ipp(self, num_runs=10, criterion='entropy', strategy='MaxEnt', disp=True):
        self._setup_ipp(criterion)
        
        for i in range(num_runs):
            print(i)
            if disp:
                print('\n==================================================================================================')
                print('Run {}/{}'.format(i+1, num_runs))
            
            run_start = time.time()
            
            # greedily select static samples
            new_gp_indices = self.greedy(self.num_samples_per_batch)  
            waypoints = [tuple(self.env.gp_index_to_map_pose(x)) for x in new_gp_indices]
            next_static_locations = np.stack(waypoints)
            self.static_locations = np.concatenate([self.static_locations, next_static_locations]).astype(int)

            # Gather data along path 
            if disp:      
                print('------ Finding valid paths ---------')
                print('Pose:',self.pose, 'Heading:', self.heading, 'Waypoints:', waypoints)

            # move to the nearest waypoint
            costs, seq = self.env.map.nearest_waypoint_path_cost(self.pose, self.heading, waypoints, return_seq=True)
            for i in range(len(seq)):
                paths_checkpoints, paths_indices, paths_cost = self.env.get_all_paths(self.pose, self.heading,
                 [waypoints[seq[i]]], costs[i], slack=0)
                assert costs[i]==paths_cost[0], 'path costs do not match'
                
                # find optimal path
                if strategy == 'Shortest':
                    best_idx = find_shortest_path(paths_cost)
                else:
                    best_idx = self.best_path(paths_indices, [new_gp_indices[seq[i]]])
                    if strategy == 'Equi-Sample':
                        best_idx = find_equi_sample_path(paths_indices, best_idx)
                
                next_path = np.stack(self.env.get_path_from_checkpoints(paths_checkpoints[best_idx]))[1:]
                next_path_indices, stds = self.get_samples_sequence_from_path(next_path, waypoints)
                self.path = np.concatenate([self.path, next_path], axis=0).astype(int)
                self.pose = tuple(self.path[-1])
                self.heading = get_heading(self.path[-2], self.path[-1])
                                
                # gather samples
                self._add_samples(next_path_indices, stds)
            
            run_end = time.time()
            if disp:
                print('\nTotal Time consumed in run {}: {:.4f}'.format(i+1, run_end - run_start))

        pred, var = self.predict(return_var=True)
        error = compute_mae(self.env.test_Y, pred)
        print('==========================================================')
        print('Strategy: {:s}'.format(strategy))
        print('--- Final statistics --- ')
        print('Test ERROR: {:.4f}'.format(error))
        print('Predictive Variance Max: {:.3f} Min: {:.3f} Mean: {:.3f}'.format(var.max(), var.min(), var.mean()))
Esempio n. 2
0
    def learn(self, iterations, predict_every=5):
        # notations:
        # X - sampled set
        # X_{t} - sampled set of type t
        # V_{t} - entire sampling set of type t

        all_mae = []
        # select samples greedily with the highest conditional utility
        for j in range(iterations):
            # print('Iteration {:d}/{:d}'.format(j+1, iterations))

            utilites = np.full(self.env.num_samples, -np.inf)
            indices = np.arange(self.env.num_samples)

            # all remaining samples
            rem = indices[~self.sampled]
            x = self.env.X[rem]
            t = self.env.ind[rem]
            mu, var = self.gp.predict(x, t, return_var=True)
            ent = entropy_from_var(var)

            # all remaining samples of type t
            rem_t = indices[~self.sampled *
                            (self.env.ind == self.env.target_task)]
            x_t = self.env.X[rem_t]
            t_t = np.full(len(x_t), self.env.target_task)
            mu_t, var_t = self.sec_gp.predict(x_t, t_t, return_var=True)
            ent_t = entropy_from_var(var_t)

            utilites[rem] = ent
            utilites[rem_t] -= ent_t

            best_idx = np.argmax(utilites)
            self.sampled[best_idx] = True

            # modify train data of main gp
            self.update_model(gp='main')

            # modify train data of secondary gp if chosen type is different from target type
            if self.env.ind[best_idx] != self.env.target_task:
                self.update_model(gp='secondary')

            if (j + 1) % predict_every == 0:
                mu = self.gp.predict(self.env.test_X, self.env.test_ind)
                mae = compute_mae(self.env.test_Y, mu)
                all_mae.append(mae)
                print('Iteration: {:d}/{:d} Test MAE: {:3f}'.format(
                    j + 1, iterations, mae))
        return all_mae
Esempio n. 3
0
    def prediction_vs_distance(self, test_every, num_runs):
        count = 0
        all_error = []
        all_mi = []
        all_var = []

        while count < test_every*num_runs:
            count += test_every
            inds = np.array(self.collected['ind'][:count])
            valid = inds!=-1

            x = self.env.X[inds[valid]]
            var = np.array(self.collected['std'])[:count][valid]**2
            y = np.array(self.collected['y'])[:count][valid]
            mu, cov, mi = predictive_distribution(self.gp, x, y, self.env.test_X, var, return_mi=True, return_cov=True)            

            error = compute_mae(self.env.test_Y, mu)
            all_error.append(error)
            all_mi.append(mi)
            all_var.append(np.diag(cov).mean())
        results = {'mean': mu, 'error': all_error, 'mi': all_mi, 'mean_var': all_var}
        return results
Esempio n. 4
0
File: run.py Progetto: sumitsk/algp
def compare_all_strategies(args):
    # compare all 5 strategies on the same environment
    strategies = [
        'MaxEnt', 'Shortest', 'Equi-Sample', 'Naive Static', 'Naive Mobile'
    ]
    ipp_strategies = ['MaxEnt', 'Shortest', 'Equi-Sample']
    naive_strategies = ['Naive Static', 'Naive Mobile']
    num_strategies = len(strategies)

    nsims = 10
    test_every = 10
    num_naive_runs = 20
    max_dist = test_every * num_naive_runs
    disp = False
    # set some initial samples
    initial_samples = 5

    error_results = [[] for _ in range(num_strategies)]
    mi_results = [[] for _ in range(num_strategies)]
    var_results = [[] for _ in range(num_strategies)]
    sample_count = [[] for _ in range(num_strategies)]
    noise_ratio = 5
    for t in range(nsims):
        env = FieldEnv(data_file=args.data_file,
                       phenotype=args.phenotype,
                       num_test=args.num_test)
        master = Agent(env, args, static_std=args.static_std)
        master.reset()
        master.pilot_survey(num_samples=initial_samples, std=master.static_std)
        mu, cov, zero_mi = master.predict(x=env.test_X,
                                          return_cov=True,
                                          return_mi=True)
        zero_error = compute_mae(mu, env.test_Y)
        zero_mean_var = np.diag(cov).mean()

        # It is not necessary to make separate agents but is useful for debugging purposes
        agents = [
            Agent(env,
                  args,
                  parent_agent=master,
                  static_std=args.static_std,
                  mobile_std=noise_ratio * args.static_std)
            for _ in range(num_strategies)
        ]

        for i in range(num_strategies):
            if strategies[i] in ipp_strategies:
                # res = agents[i].run_ipp(num_runs=args.num_runs, strategy=strategies[i], disp=disp)
                res = agents[i].run_greedy_ipp(num_runs=args.num_runs,
                                               strategy=strategies[i],
                                               disp=disp)
                res = agents[i].prediction_vs_distance(test_every=test_every,
                                                       num_runs=num_naive_runs)
            elif strategies[i] in naive_strategies:
                std = master.static_std if 'Static' in strategies[
                    i] else master.mobile_std
                res = agents[i].run_naive(std=std,
                                          counts=[test_every] * num_naive_runs,
                                          metric='distance')
            else:
                raise NotImplementedError
            error_results[i].append([zero_error] + res['error'])
            mi_results[i].append([zero_mi] + res['mi'])
            var_results[i].append([zero_mean_var] + res['mean_var'])
            sample_count[i].append(
                path_to_sample_count(env, agents[i].path)[:max_dist])
    start = test_every
    x = [initial_samples] + list(
        np.arange(start, start + test_every * num_naive_runs, test_every))
    # x = np.stack([x for _ in range(nsims)]).flatten()
    x = np.tile(x, nsims)
    xlabel = 'Distance travelled'
    ci = 50

    # test error
    errors = [np.stack(res).flatten() for res in error_results]
    dct_err = {'x': x}
    for y, lbl in zip(errors, strategies):
        dct_err[lbl] = y
    df_err = pd.DataFrame.from_dict(dct_err)

    ylabel = 'Test MAE'
    generate_lineplots(df_err,
                       x='x',
                       xlabel=xlabel,
                       ylabel=ylabel,
                       legends=strategies,
                       ci=ci)

    # sample_count vs distance
    all_sample_count = [np.stack(sc).flatten() for sc in sample_count]
    dist = np.tile(np.arange(1, 1 + max_dist), nsims)
    dct_sc = {'x': dist}
    for y, lbl in zip(all_sample_count, strategies):
        dct_sc[lbl] = y
    df_sc = pd.DataFrame.from_dict(dct_sc)

    ylabel_sc = 'Number of samples'
    generate_lineplots(df_sc,
                       x='x',
                       xlabel=xlabel,
                       ylabel=ylabel_sc,
                       legends=strategies,
                       ci=ci)

    ipdb.set_trace()
Esempio n. 5
0
File: run.py Progetto: sumitsk/algp
def compare_maxent(args):
    nsims = 10

    test_every = 10
    num_naive_runs = 25
    disp = False
    # set some initial samples
    initial_samples = 5
    # noise_ratios = [1,2,5,10]
    # variants = ['test_every = ' + str(n) for n in noise_ratios]

    slacks = [0, 5, 10, 15]
    variants = ['slack = ' + str(s) for s in slacks]

    nv = len(variants)
    error_results = [[] for _ in range(nv)]
    mi_results = [[] for _ in range(nv)]
    var_results = [[] for _ in range(nv)]

    for t in range(nsims):
        env = FieldEnv(data_file=args.data_file,
                       phenotype=args.phenotype,
                       num_test=args.num_test)
        master = Agent(env, args, static_std=args.static_std)
        master.reset()
        master.pilot_survey(num_samples=initial_samples, std=master.static_std)
        mu, cov, zero_mi = master.predict(x=env.test_X,
                                          return_cov=True,
                                          return_mi=True)
        zero_error = compute_mae(mu, env.test_Y)
        zero_mean_var = np.diag(cov).mean()

        # It is not necessary to make separate agents but is useful for debugging purposes
        # agents = [Agent(env, args, parent_agent=master, static_std=args.static_std, mobile_std=kappa*args.static_std) for kappa in noise_ratios]
        agents = [
            Agent(env,
                  args,
                  parent_agent=master,
                  static_std=args.static_std,
                  mobile_std=5 * args.static_std) for _ in range(nv)
        ]

        for i in range(nv):
            res = agents[i].run_ipp(num_runs=args.num_runs,
                                    strategy='MaxEnt',
                                    disp=disp,
                                    slack=slacks[i])
            # res = agents[i].run_ipp(num_runs=args.num_runs, strategy='MaxEnt', disp=disp, slack=0)
            res = agents[i].prediction_vs_distance(test_every=test_every,
                                                   num_runs=num_naive_runs)
            error_results[i].append([zero_error] + res['error'])
            mi_results[i].append([zero_mi] + res['mi'])
            var_results[i].append([zero_mean_var] + res['mean_var'])

    start = test_every
    x = [initial_samples] + list(
        np.arange(start, start + test_every * num_naive_runs, test_every))
    x = np.stack([x for _ in range(nsims)]).flatten()
    xlabel = 'Distance travelled'
    ci = 50

    # test error
    errors = [np.stack(res).flatten() for res in error_results]
    dct_err = {'x': x}
    for y, lbl in zip(errors, variants):
        dct_err[lbl] = y
    df_err = pd.DataFrame.from_dict(dct_err)

    ylabel = 'Test MAE'
    generate_lineplots(df_err,
                       x='x',
                       xlabel=xlabel,
                       ylabel=ylabel,
                       legends=variants,
                       ci=ci)

    # test variance
    dct_var = {'x': x}
    varss = [np.stack(res).flatten() for res in var_results]
    for y, lbl in zip(varss, variants):
        dct_var[lbl] = y
    df_var = pd.DataFrame.from_dict(dct_var)
    ylabel_var = 'Test Mean Variance'
    generate_lineplots(df_var,
                       x='x',
                       xlabel=xlabel,
                       ylabel=ylabel_var,
                       legends=variants,
                       ci=ci)
    ipdb.set_trace()
Esempio n. 6
0
from agent import Master, Agent
from arguments import get_args
from utils import compute_mae

if __name__ == '__main__':
    args = get_args()

    # Environment with Jura dataset
    env = JuraEnv()

    # gp model hyperparameters are learned by the master
    print('Learning Model Hyperparameters ... ')
    master = Master(env, args)
    # compute the MAE on the test set conditioning on all the training data
    mu = master.gp.predict(env.test_X, env.test_ind)
    mae = compute_mae(env.test_Y, mu)
    print('Ideal MAE: {:3f}\n'.format(mae))

    # agent copies the hyperparameters of the master's gp model and performs active learning on the dataset
    agent = Agent(master)
    num_samples = 800
    predict_every = 50
    print('Active Learning ... ')
    all_mae = agent.learn(iterations=num_samples, predict_every=predict_every)

    # plot MAE v/s iterations
    x = np.arange(predict_every, num_samples + 1, predict_every)
    plt.ylabel('MAE')
    plt.xlabel('Iterations')
    plt.plot(x, np.array(all_mae))
    plt.show()
Esempio n. 7
0
# test_y = zero_mean_unit_variance(test_y, std=1)

# predict Cd at test locations given Cd, Ni and Zn at training locations and Ni and Zn at test locations
num_tasks = len(target_features)

# repeat train_x and train_y for all target features
# train_ind = [0, 0, ..., 1, 1, ...]
ind1 = np.arange(num_tasks).reshape(1, -1).repeat(len(train_x), 0).flatten()
ind2 = np.arange(1, num_tasks).reshape(1, -1).repeat(len(test_x), 0).flatten()
x1 = np.repeat(train_x, num_tasks, 0)
x2 = np.repeat(test_x, num_tasks - 1, 0)
y1 = train_y.flatten()
y2 = test_y[:, 1:].flatten()

# effective training set
tx = np.concatenate([x1, x2])
ty = np.concatenate([y1, y2])
tind = np.concatenate([ind1, ind2])

# effective test set
test_ind = np.full(len(test_x), 0)

kernel_params = {'type': 'rbf'}
gp = HadamardMTGP(num_tasks=num_tasks,
                  lr=.1,
                  max_iter=300,
                  kernel_params=kernel_params)
gp.fit(tx, tind, ty, disp=True)
mu = gp.predict(test_x, test_ind)
mae = compute_mae(test_y[:, 0], mu)
ipdb.set_trace()
Esempio n. 8
0
    def run_naive(self, std, counts, metric='distance'):
        # traverse each row from start to end in a naive manner
        # counts should be list of ints
        # metric - either distance or sample

        test_error = []
        all_mi = []
        all_var = []

        for ns in counts:
            inds = []
            c = 0
            done = False
            while not done:
                # keep moving in the heading direction till you reach the end and need to shift to the next array
                next_pose = (self.pose[0]+self.heading[0], self.pose[1]+self.heading[1])
                ind = self.env.map_pose_to_gp_index_matrix[next_pose]
                if ind is not None:
                    inds.append(ind)

                if metric == 'samples':
                    if next_pose[0] == self.env.map.shape[0] - 1:
                        poses = [next_pose, (next_pose[0], next_pose[1]+1), (next_pose[0], next_pose[1]+2), (next_pose[0]-1, next_pose[1]+2)]
                        self.path = np.concatenate([self.path, poses], axis=0).astype(int)
                        self.heading = (-self.heading[0], 0)                       
                        self.pose = poses[-1]
                        
                    elif next_pose[0] == 0:
                        poses = [next_pose, (next_pose[0], next_pose[1]+1), (next_pose[0], next_pose[1]+2), (next_pose[0]+1, next_pose[1]+2)]
                        self.path = np.concatenate([self.path, poses], axis=0).astype(int)
                        self.heading = (-self.heading[0], 0)
                        self.pose = poses[-1]

                    else:
                        self.path = np.concatenate([self.path, [next_pose]], axis=0).astype(int)
                        self.pose = next_pose

                    done = len(inds)==ns

                elif metric == 'distance':
                    c += 1
                    self.path = np.concatenate([self.path, [next_pose]], axis=0).astype(int)
                    if next_pose[0]==0 and next_pose[1]%2==0:
                        self.heading = (0,1) if self.heading==(-1,0) else (1,0)
                    elif next_pose[0]==self.env.map.shape[0]-1 and next_pose[1]%2==0:
                        self.heading = (0,1) if self.heading==(1,0) else (-1,0)
                    self.pose = next_pose
                    done = c==ns
                else:
                    raise NotImplementedError

            self._add_samples(inds, [std]*len(inds))
            mu, cov, mi = self.predict(return_cov=True, return_mi=True)
            error = compute_mae(self.env.test_Y, mu)
            test_error.append(error)
            all_mi.append(mi)
            all_var.append(np.diag(cov).mean())

            # TODO: implement simulation rendering 

        var = np.diag(cov)
        strategy = 'Naive Static' if std==self.static_std else 'Naive Mobile'
        print('==========================================================')
        print('Strategy: ', strategy)
        print('--- Final statistics --- ')
        print('Test ERROR: {:.4f}'.format(error))
        print('Predictive Variance Max: {:.3f} Min: {:.3f} Mean: {:.3f}'.format(var.max(), var.min(), var.mean()))
        results = {'mean': mu, 'error': test_error, 'mi': all_mi, 'mean_var': all_var}
        return results
Esempio n. 9
0
    def run_ipp(self, render=False, num_runs=10, criterion='entropy', update=False, slack=0, strategy='MaxEnt', disp=True):
        # informative path planner
        assert strategy in ['MaxEnt', 'Shortest', 'Equi-Sample'], 'Unknown strategy!!'
        assert criterion in ['entropy', 'mutual_information'], 'Unknown criterion!!'
        self._setup_ipp(criterion, update)

        test_error = []

        for i in range(num_runs):
            if disp:
                print('\n==================================================================================================')
                print('Run {}/{}'.format(i+1, num_runs))
            
            run_start = time.time()
            
            # greedily select static samples
            new_gp_indices = self.greedy(self.num_samples_per_batch)  
            waypoints = [tuple(self.env.gp_index_to_map_pose(x)) for x in new_gp_indices]
            next_static_locations = np.stack(waypoints)
            self.static_locations = np.concatenate([self.static_locations, next_static_locations]).astype(int)

            # Gather data along path 
            if disp:      
                print('------ Finding valid paths ---------')
                print('Pose:',self.pose, 'Heading:', self.heading, 'Waypoints:', waypoints)
            
            # find all paths 
            start = time.time()
            least_cost_ub = self.env.get_heuristic_cost(self.pose, self.heading, waypoints)
            if disp:
                print('Least cost upper bound:',least_cost_ub)
            paths_checkpoints, paths_indices, paths_cost = self.env.get_all_paths(self.pose, self.heading, waypoints, least_cost_ub, slack)
            end = time.time()
            if disp:
                print('Number of feasible paths: ', len(paths_indices))
                print('Time consumed {:.4f}'.format(end - start))
                print('\n------ Finding best path ----------')
            
            # find optimal path
            start = time.time()
            if strategy == 'Shortest':
                best_idx = find_shortest_path(paths_cost)
            else:
                best_idx = self.best_path(paths_indices, new_gp_indices)
                if strategy == 'Equi-Sample':
                    best_idx = find_equi_sample_path(paths_indices, best_idx)
            end = time.time()

            if disp:
                least_cost = min(paths_cost)
                print('Least cost: {} Best path cost: {}'.format(least_cost, paths_cost[best_idx]))
                print('Time consumed {:.4f}'.format(end - start))
            
            # update agent's record
            next_path = np.stack(self.env.get_path_from_checkpoints(paths_checkpoints[best_idx]))[1:]
            next_path_indices, stds = self.get_samples_sequence_from_path(next_path, waypoints)
            self.path = np.concatenate([self.path, next_path], axis=0).astype(int)
            self.pose = tuple(self.path[-1])
            self.heading = get_heading(self.path[-2], self.path[-1])
            
            if render:
                pred = self.predict(self.env.all_x).reshape(self.env.shape)
                # true = self.env.all_y.reshape(self.env.shape)
                # self.env.render(paths_checkpoints[best_idx], self.path, next_static_locations, self.static_locations, true, pred)
                self.env.render(paths_checkpoints[best_idx], self.path, next_static_locations, self.static_locations)

            # gather samples
            self._add_samples(next_path_indices, stds)
            
            # update hyperparameters of GP model
            # TODO: this may not work properly right now
            if update and (i+1) % self.update_every == 0:
                if disp:
                    print('\n---------- Updating model --------------')
                start = time.time()
                self.update_model()
                self._post_update()
                end = time.time()
                if disp:
                    print('Time consumed {:.4f}'.format(end - start))

            # predict on test set
            if disp:
                print('\n-------- Prediction -------------- ')
            start = time.time()
            pred, var = self.predict(return_var=True)
            error = compute_mae(self.env.test_Y, pred)
            test_error.append(error)
            end = time.time()
            if disp:
                print('Test ERROR: {:.4f}'.format(error))
                print('Predictive Variance Max: {:.3f} Min: {:.3f} Mean: {:.3f}'.format(var.max(), var.min(), var.mean()))
                print('Time consumed {:.4f}'.format(end - start))

            run_end = time.time()
            if disp:
                print('\nTotal Time consumed in run {}: {:.4f}'.format(i+1, run_end - run_start))

        print('==========================================================')
        print('Strategy: {:s}'.format(strategy))
        print('--- Final statistics --- ')
        print('Test ERROR: {:.4f}'.format(error))
        print('Predictive Variance Max: {:.3f} Min: {:.3f} Mean: {:.3f}'.format(var.max(), var.min(), var.mean()))
        results = {'mean': pred, 'error':test_error}
        return results