def planner(self, T): ''' Gather noisy samples of the environment and updates the robot's GP model Input: T (int > 0): the length of the planning horization (number of planning iterations)''' self.trajectory = [] self.dist = 0 for t in xrange(T): # Select the best trajectory according to the robot's aquisition function print "[", t, "] Current Location: ", self.loc logger.info("[{}] Current Location: {}".format(t, self.loc)) # Let's figure out where the best point is in our world pred_loc, pred_val = self.predict_max() print "Current predicted max and value: \t", pred_loc, "\t", pred_val logger.info("Current predicted max and value: {} \t {}".format(pred_loc, pred_val)) if self.nonmyopic == False: sampling_path, best_path, best_val, all_paths, all_values, max_locs = self.choose_trajectory(t = t) else: # set params if self.f_rew == "exp_improve": param = self.current_max else: param = None # create the tree search mcts = mctslib.cMCTS(self.comp_budget, self.GP, self.loc, self.roll_length, self.path_generator, self.aquisition_function, self.f_rew, t, aq_param = param, use_cost = self.use_cost, tree_type = self.tree_type) sampling_path, best_path, best_val, all_paths, all_values, self.max_locs, self.max_val = mcts.choose_trajectory(t = t) # Update eval metrics start = self.loc for m in best_path: self.dist += np.sqrt((start[0]-m[0])**2 + (start[1]-m[1])**2) start = m self.eval.update_metrics(len(self.trajectory), self.GP, all_paths, sampling_path, \ value = best_val, max_loc = pred_loc, max_val = pred_val, params = [self.current_max, self.current_max_loc, self.max_val, self.max_locs], dist = self.dist) if best_path == None: break data = np.array(sampling_path) x1 = data[:,0] x2 = data[:,1] xlocs = np.vstack([x1, x2]).T self.collect_observations(xlocs) if t < T/3 and self.learn_params == True: self.GP.train_kernel() self.trajectory.append(best_path) if self.create_animation: self.visualize_trajectory(screen = False, filename = t, best_path = sampling_path, maxes = self.max_locs, all_paths = all_paths, all_vals = all_values) #if t > 50: # self.visualize_reward(screen = True, filename = 'REWARD_' + str(t), t = t) self.loc = sampling_path[-1] np.savetxt('./figures/' + self.f_rew+ '/robot_model.csv', (self.GP.xvals[:, 0], self.GP.xvals[:, 1], self.GP.zvals[:, 0]))
def planner(self, T): ''' Gather noisy samples of the environment and updates the robot's GP model Input: T (int > 0): the length of the planning horizon (number of planning iterations)''' self.trajectory = [] self.dist = 0 for t in xrange(T): # Select the best trajectory according to the robot's aquisition function self.time = t print "[", t, "] Current Location: ", self.loc, "Current Time:", self.time logger.info("[{}] Current Location: {}".format(t, self.loc)) # Let's figure out where the best point is in our world pred_loc, pred_val = self.predict_max() print "Current predicted max and value: \t", pred_loc, "\t", pred_val logger.info("Current predicted max and value: {} \t {}".format( pred_loc, pred_val)) # If myopic planner if self.nonmyopic == False: sampling_path, best_path, best_val, all_paths, all_values, max_locs = self.choose_trajectory( t=t) else: if self.f_rew == "naive" or self.f_rew == "naive_value": param = (self.sample_num, self.sample_radius) else: # set params if self.f_rew == "exp_improve": param = self.current_max else: param = None # create the tree search mcts = mctslib.cMCTS(self.comp_budget, self.GP, self.loc, self.roll_length, self.path_generator, self.aquisition_function, self.f_rew, t, aq_param=param, use_cost=self.use_cost, tree_type=self.tree_type) sampling_path, best_path, best_val, all_paths, all_values, self.max_locs, self.max_val, self.target = mcts.choose_trajectory( t=t) ''' Update eval metrics ''' # Compute distance traveled start = self.loc for m in best_path: self.dist += np.sqrt((start[0] - m[0])**2 + (start[1] - m[1])**2) start = m # Update planner evaluation metrics self.eval.update_metrics(t=len(self.trajectory), robot_model=self.GP, all_paths=all_paths, selected_path=sampling_path, value=best_val, max_loc=pred_loc, max_val=pred_val, params=[ self.current_max, self.current_max_loc, self.max_val, self.max_locs ], dist=self.dist) if best_path == None: break # Gather data along the selected path and update GP data = np.array(sampling_path) x1 = data[:, 0] x2 = data[:, 1] if self.dimension == 2: xlocs = np.vstack([x1, x2]).T elif self.dimension == 3: # Collect observations at the current time xlocs = np.vstack([x1, x2, t * np.ones(len(x1))]).T else: raise ValueError('Only 2D or 3D worlds supported!') self.collect_observations(xlocs) # If set, learn the kernel parameters from the new data if t < T / 3 and self.learn_params == True: self.GP.train_kernel() self.trajectory.append(best_path) # If set, update the visualization if self.create_animation: self.visualize_trajectory(screen=False, filename=t, best_path=sampling_path, maxes=self.max_locs, all_paths=all_paths, all_vals=all_values) self.visualize_reward(screen=True, filename='REWARD.' + str(t), t=t) # Update the robot's current location self.loc = sampling_path[-1] np.savetxt( './figures/' + self.f_rew + '/robot_model.csv', (self.GP.xvals[:, 0], self.GP.xvals[:, 1], self.GP.zvals[:, 0]))