def planner(self, T):
        ''' Gather noisy samples of the environment and updates the robot's GP model  
        Input: 
            T (int > 0): the length of the planning horization (number of planning iterations)'''
        self.trajectory = []
        self.dist = 0
        
        for t in xrange(T):
            # Select the best trajectory according to the robot's aquisition function
            print "[", t, "] Current Location:  ", self.loc
            logger.info("[{}] Current Location: {}".format(t, self.loc))

            # Let's figure out where the best point is in our world
            pred_loc, pred_val = self.predict_max()
            print "Current predicted max and value: \t", pred_loc, "\t", pred_val
            logger.info("Current predicted max and value: {} \t {}".format(pred_loc, pred_val))


            if self.nonmyopic == False:
                sampling_path, best_path, best_val, all_paths, all_values, max_locs = self.choose_trajectory(t = t)
            else:
                # set params
                if self.f_rew == "exp_improve":
                    param = self.current_max
                else:
                    param = None
                # create the tree search
                mcts = mctslib.cMCTS(self.comp_budget, self.GP, self.loc, self.roll_length, self.path_generator, self.aquisition_function, self.f_rew, t, aq_param = param, use_cost = self.use_cost, tree_type = self.tree_type)
                sampling_path, best_path, best_val, all_paths, all_values, self.max_locs, self.max_val = mcts.choose_trajectory(t = t)
            
            # Update eval metrics
            start = self.loc
            for m in best_path:
                self.dist += np.sqrt((start[0]-m[0])**2 + (start[1]-m[1])**2)
                start = m
            self.eval.update_metrics(len(self.trajectory), self.GP, all_paths, sampling_path, \
            value = best_val, max_loc = pred_loc, max_val = pred_val, params = [self.current_max, self.current_max_loc, self.max_val, self.max_locs], dist = self.dist) 

            if best_path == None:
                break
            data = np.array(sampling_path)
            x1 = data[:,0]
            x2 = data[:,1]
            xlocs = np.vstack([x1, x2]).T           
            
            self.collect_observations(xlocs)
            if t < T/3 and self.learn_params == True:
                self.GP.train_kernel()
            self.trajectory.append(best_path)
            
            if self.create_animation:
                self.visualize_trajectory(screen = False, filename = t, best_path = sampling_path, 
                        maxes = self.max_locs, all_paths = all_paths, all_vals = all_values)            

            #if t > 50:
            #    self.visualize_reward(screen = True, filename = 'REWARD_' + str(t), t = t)

            self.loc = sampling_path[-1]
        np.savetxt('./figures/' + self.f_rew+ '/robot_model.csv', (self.GP.xvals[:, 0], self.GP.xvals[:, 1], self.GP.zvals[:, 0]))
    def planner(self, T):
        ''' Gather noisy samples of the environment and updates the robot's GP model  
        Input: 
            T (int > 0): the length of the planning horizon (number of planning iterations)'''
        self.trajectory = []
        self.dist = 0

        for t in xrange(T):
            # Select the best trajectory according to the robot's aquisition function
            self.time = t
            print "[", t, "] Current Location:  ", self.loc, "Current Time:", self.time
            logger.info("[{}] Current Location: {}".format(t, self.loc))

            # Let's figure out where the best point is in our world
            pred_loc, pred_val = self.predict_max()
            print "Current predicted max and value: \t", pred_loc, "\t", pred_val
            logger.info("Current predicted max and value: {} \t {}".format(
                pred_loc, pred_val))

            # If myopic planner
            if self.nonmyopic == False:
                sampling_path, best_path, best_val, all_paths, all_values, max_locs = self.choose_trajectory(
                    t=t)
            else:

                if self.f_rew == "naive" or self.f_rew == "naive_value":
                    param = (self.sample_num, self.sample_radius)
                else:
                    # set params
                    if self.f_rew == "exp_improve":
                        param = self.current_max
                    else:
                        param = None
                # create the tree search
                mcts = mctslib.cMCTS(self.comp_budget,
                                     self.GP,
                                     self.loc,
                                     self.roll_length,
                                     self.path_generator,
                                     self.aquisition_function,
                                     self.f_rew,
                                     t,
                                     aq_param=param,
                                     use_cost=self.use_cost,
                                     tree_type=self.tree_type)
                sampling_path, best_path, best_val, all_paths, all_values, self.max_locs, self.max_val, self.target = mcts.choose_trajectory(
                    t=t)
            ''' Update eval metrics '''
            # Compute distance traveled
            start = self.loc
            for m in best_path:
                self.dist += np.sqrt((start[0] - m[0])**2 +
                                     (start[1] - m[1])**2)
                start = m

            # Update planner evaluation metrics
            self.eval.update_metrics(t=len(self.trajectory),
                                     robot_model=self.GP,
                                     all_paths=all_paths,
                                     selected_path=sampling_path,
                                     value=best_val,
                                     max_loc=pred_loc,
                                     max_val=pred_val,
                                     params=[
                                         self.current_max,
                                         self.current_max_loc, self.max_val,
                                         self.max_locs
                                     ],
                                     dist=self.dist)

            if best_path == None:
                break

            # Gather data along the selected path and update GP
            data = np.array(sampling_path)
            x1 = data[:, 0]
            x2 = data[:, 1]
            if self.dimension == 2:
                xlocs = np.vstack([x1, x2]).T
            elif self.dimension == 3:
                # Collect observations at the current time
                xlocs = np.vstack([x1, x2, t * np.ones(len(x1))]).T
            else:
                raise ValueError('Only 2D or 3D worlds supported!')

            self.collect_observations(xlocs)

            # If set, learn the kernel parameters from the new data
            if t < T / 3 and self.learn_params == True:
                self.GP.train_kernel()

            self.trajectory.append(best_path)

            # If set, update the visualization
            if self.create_animation:
                self.visualize_trajectory(screen=False,
                                          filename=t,
                                          best_path=sampling_path,
                                          maxes=self.max_locs,
                                          all_paths=all_paths,
                                          all_vals=all_values)
                self.visualize_reward(screen=True,
                                      filename='REWARD.' + str(t),
                                      t=t)

            # Update the robot's current location
            self.loc = sampling_path[-1]

        np.savetxt(
            './figures/' + self.f_rew + '/robot_model.csv',
            (self.GP.xvals[:, 0], self.GP.xvals[:, 1], self.GP.zvals[:, 0]))