Exemple #1
0
  def __init__(self, env_params, lattice_type, lattice_params, cost_fn, learner_params):
    
    self.env_params = env_params
    self.cost_fn = cost_fn
    self.lattice_type = lattice_type
    if lattice_type == "XY":
      self.lattice = XYAnalyticLattice(lattice_params)
      self.start_n = self.lattice.state_to_node((lattice_params['x_lims'][0], lattice_params['y_lims'][0]))
      self.goal_n = self.lattice.state_to_node((lattice_params['x_lims'][1]-1, lattice_params['y_lims'][0]-1))

    
    elif lattice_type == "XYH":
      self.lattice = XYHAnalyticLattice(lattice_params)
      self.start_n = self.lattice.state_to_node((lattice_params['x_lims'][0], lattice_params['y_lims'][0], 0))
      self.goal_n = self.lattice.state_to_node((lattice_params['x_lims'][1]-1, lattice_params['y_lims'][0]-1, 0))  
    
    self.lattice.precalc_costs(self.cost_fn) #Enumerate and cache successors and edge costs 
    
    self.learner_policy = None #This will be set prior to running a polciy using set_learner_policy      
        
    #Data structures for planning
    self.frontier = [] #Frontier is un-sorted as it is sorted on demand (using heuristic)
    self.oracle_frontier = PriorityQueue() #Frontier sorted according to oracle(for mixing)
    self.visited = {} #Keep track of visited cells
    self.c_obs = []  #Keep track of collision checks done so far
    self.cost_so_far = defaultdict(lambda: np.inf) #Keep track of cost of path to the node
    self.came_from = {} #Keep track of parent during search

    self.learner = SupervisedRegressionNetwork(learner_params) #learner is a part of the environment
Exemple #2
0
  def __init__(self):
    """Planner takes as input a planning problem object and returns
      the path and generated states
    @param problem   - planning problem object with following fields
    """
    self.frontier = PriorityQueue() #Open list
    self.visited = {} #Keep track of visited cells
    self.c_obs = []  #Keep track of collision checks done so far
    self.cost_so_far = {} #Keep track of cost of path to the node
    self.came_from = {} #Keep track of parent during search

    super(Astar, self).__init__()
Exemple #3
0
class Astar(SearchBasedPlanner):
  def __init__(self):
    """Planner takes as input a planning problem object and returns
      the path and generated states
    @param problem   - planning problem object with following fields
    """
    self.frontier = PriorityQueue() #Open list
    self.visited = {} #Keep track of visited cells
    self.c_obs = []  #Keep track of collision checks done so far
    self.cost_so_far = {} #Keep track of cost of path to the node
    self.came_from = {} #Keep track of parent during search

    super(Astar, self).__init__()

  def plan(self, max_expansions = 100000):
    assert self.initialized == True, "Planner has not been initialized properly. Please call initialize or reset_problem function before plan function"
    start_t = time.time()
    self.came_from[self.start_node]= (None, None)
    self.cost_so_far[self.start_node] = 0.
    start_h_val = self.heuristic_weight*self.get_heuristic(self.start_node, self.goal_node)
    self.frontier.put(self.start_node, 0.0 + start_h_val, 0.0)# start_h_val)

    curr_expansions = 0         #Number of expansions done
    num_rexpansions = 0
    found_goal = False
    path =[]
    path_cost = np.inf

    while not self.frontier.empty():
      #Check 1: Stop search if frontier gets too large
      if curr_expansions >= max_expansions:
        print("Max Expansions Done.")
        break
      #Check 2: Stop search if open list gets too large
      if self.frontier.size() > 500000:
        print("Timeout.")
        break
      
      #Step 1: Pop the best node from the frontier
      f, h, curr_node = self.frontier.get()
      if curr_node in self.visited:
        continue
      #Step 2: Add to visited
      self.visited[curr_node] = 1
      #Check 3: Stop search if goal found
      if curr_node == self.goal_node:  
        print "Found goal"
        found_goal = True
        break
      
      #Step 3: If search has not ended, add neighbors of current node to frontier
      neighbors, edge_costs, valid_edges, invalid_edges = self.get_successors(curr_node)

      #Step 4: Update c_obs with collision checks performed
      # self.c_obs.append(invalid_edges)
      g = self.cost_so_far[curr_node]
      
      for i, neighbor in enumerate(neighbors):
        new_g = g + edge_costs[i]
        if neighbor not in self.visited:
          if neighbor not in self.cost_so_far or new_g <= self.cost_so_far[neighbor]:
            self.came_from[neighbor] = (curr_node, valid_edges[i])
            self.cost_so_far[neighbor] = new_g
            h_val = self.heuristic_weight*self.get_heuristic(neighbor, self.goal_node)
            f_val = new_g + h_val
            self.frontier.put(neighbor, f_val, new_g)
      
      #Step 5:increment number of expansions
      curr_expansions += 1

    if found_goal:
      path, path_cost = self.reconstruct_path(self.came_from, self.start_node, self.goal_node, self.cost_so_far)
    else:
      print ('Found no solution, priority queue empty')
    plan_time = time.time() - start_t

    return path, path_cost, curr_expansions, plan_time, self.came_from, self.cost_so_far, self.c_obs
    
  
  def clear_planner(self):
    self.frontier.clear()
    self.visited = {}
    self.c_obs = []
    self.cost_so_far = {}
    self.came_from = {}

    
Exemple #4
0
class ValueIteration(SearchBasedPlanner):
    def __init__(self):
        """Planner takes as input a planning problem object and returns
      the path and generated states
    @param problem   - planning problem object with following fields
    """
        self.frontier = PriorityQueue()  #Open list
        self.visited = {}  #Keep track of visited cells
        self.c_obs = []  #Keep track of collision checks done so far
        self.cost_so_far = {}  #Keep track of cost of path to the node
        self.came_from = {}  #Keep track of parent during search

        super(ValueIteration, self).__init__()

    def plan(self, max_expansions=100000):
        assert self.initialized == True, "Planner has not been initialized properly. Please call initialize or reset_problem function before plan function"
        start_t = time.time()

        self.came_from[self.goal_node] = (None, None)
        self.cost_so_far[self.goal_node] = 0.

        self.frontier.put(self.goal_node, 0.0)

        curr_expansions = 0  #Number of expansions done
        found_goal = False
        path = []
        path_cost = np.inf

        while not self.frontier.empty():
            #Check 1: Stop search if frontier gets too large
            if curr_expansions >= max_expansions:
                print("Max Expansions Done.")
                break
            #Check 2: Stop search if open list gets too large
            if self.frontier.size() > 500000:
                print("Timeout.")
                break

            #Step 1: Pop the best node from the frontier
            f, _, curr_node = self.frontier.get()

            if curr_node in self.visited:
                continue
            #Step 2: Add to visited
            self.visited[curr_node] = 1

            #Step 3: If search has not ended, add neighbors of current node to frontier. We call get_predecessors instead of get_successors
            neighbors, edge_costs, valid_edges, invalid_edges = self.get_predecessors(
                curr_node)

            g = self.cost_so_far[curr_node]

            for i, neighbor in enumerate(neighbors):
                new_g = g + edge_costs[i]
                if neighbor not in self.visited:
                    if neighbor not in self.cost_so_far or new_g < self.cost_so_far[
                            neighbor]:
                        self.came_from[neighbor] = (curr_node, valid_edges[i])
                        self.cost_so_far[neighbor] = new_g
                        f_val = new_g
                        self.frontier.put(neighbor, f_val)

            #Step 5:increment number of expansions
            curr_expansions += 1
        plan_time = time.time() - start_t

        return path, path_cost, curr_expansions, plan_time, self.came_from, self.cost_so_far, self.c_obs

    def clear_planner(self):
        self.frontier.clear()
        self.visited = {}
        self.c_obs = []
        self.cost_so_far = {}
        self.came_from = {}
Exemple #5
0
class StateLatticePlannerEnv(SearchBasedPlanner):
  def __init__(self, env_params, lattice_type, lattice_params, cost_fn, learner_params):
    
    self.env_params = env_params
    self.cost_fn = cost_fn
    self.lattice_type = lattice_type
    if lattice_type == "XY":
      self.lattice = XYAnalyticLattice(lattice_params)
      self.start_n = self.lattice.state_to_node((lattice_params['x_lims'][0], lattice_params['y_lims'][0]))
      self.goal_n = self.lattice.state_to_node((lattice_params['x_lims'][1]-1, lattice_params['y_lims'][0]-1))

    
    elif lattice_type == "XYH":
      self.lattice = XYHAnalyticLattice(lattice_params)
      self.start_n = self.lattice.state_to_node((lattice_params['x_lims'][0], lattice_params['y_lims'][0], 0))
      self.goal_n = self.lattice.state_to_node((lattice_params['x_lims'][1]-1, lattice_params['y_lims'][0]-1, 0))  
    
    self.lattice.precalc_costs(self.cost_fn) #Enumerate and cache successors and edge costs 
    
    self.learner_policy = None #This will be set prior to running a polciy using set_learner_policy      
        
    #Data structures for planning
    self.frontier = [] #Frontier is un-sorted as it is sorted on demand (using heuristic)
    self.oracle_frontier = PriorityQueue() #Frontier sorted according to oracle(for mixing)
    self.visited = {} #Keep track of visited cells
    self.c_obs = []  #Keep track of collision checks done so far
    self.cost_so_far = defaultdict(lambda: np.inf) #Keep track of cost of path to the node
    self.came_from = {} #Keep track of parent during search

    self.learner = SupervisedRegressionNetwork(learner_params) #learner is a part of the environment

  def initialize(self, env_folder, oracle_folder, num_envs, file_start_num, phase='train', visualize=False):
    """Initialize everything"""
    self.env_folder = env_folder 
    self.oracle_folder = oracle_folder 
    self.num_envs = num_envs  
    self.phase = phase
    self.visualize = visualize
    self.curr_env_num = file_start_num - 1

      
  def set_mixing_param(self, beta):
    self.beta = beta
  
  def run_episode(k_tsteps=None, max_expansions=1000000):
    assert self.initialized == True, "Planner has not been initialized properly. Please call initialize or reset_problem function before plan function"
    
    start_t = time.time()
    data = [] #Dataset that will be filled during training   
    
    self.came_from[self.start_n]= (None, None)
    self.cost_so_far[self.start_n] = 0.  #For each node, this is the cost of the shortest path to the start

    
    self.num_invalid_predecessors[start] = 0
    self.num_invalid_siblings[start] = 0
    self.depth_so_far[start] = 0
    
    if self.phase == "train":
      start_h_val = self.oracle[self.start_n]
      self.oracle_frontier.put(self.start_n, start_h_val)
    
    self.frontier.append(self.start_n) #This frontier is just a list

    curr_expansions = 0         #Number of expansions done
    num_rexpansions = 0
    found_goal = False
    path =[]
    path_cost = np.inf

    while len(self.frontier) > 0: 
      #Check 1: Stop search if frontier gets too large
      if curr_expansions >= max_expansions:
        print("Max Expansions Done.")
        break
      #Check 2: Stop search if open list gets too large
      if len(self.frontier) > 500000:
        print("Timeout.")
        break

      #################################################################################################
      #Step 1: With probability beta, we select the oracle and (1-beta) we select the learner, also we collect data if 
      # curr_expansions is in one of the k timesteps
      if phase == "train":
        if curr_expansions in k_tsteps:
          rand_idx = np.random.randint(len(self.frontier))
          n = self.frontier[rand_idx]   #Choose a random action
          data.append(self.get_feature_vec[n], self.curr_oracle[n]) #Query oracle for Q-value of that action and append to dataset
        if np.random.random() <= self.beta:
          h, curr_node = self.oracle_frontier.get()
        else
          curr_node = self.get_best_node()
      else:
        curr_node = self.get_best_node()
      #################################################################################################
      
      if curr_node in self.visited:
        continue
      
      #Step 3: Add to visited
      self.visited[curr_node] = 1

      #Check 3: Stop search if goal found
      if curr_node == self.goal_node:  
        print "Found goal"
        found_goal = True
        break
      
      #Step 4: If search has not ended, add neighbors of current node to frontier
      neighbors, edge_costs, valid_edges, invalid_edges = self.get_successors(curr_node)
      
      #Update the features of the parent and current node
      n_invalid_edges = len(invalid_edges)
      self.num_invalid_grand_children[self.came_from[curr_node][0]] += n_invalid_edges
      self.num_invalid_children[curr_node] = n_invalid_edges

      #Step 5: Update c_obs with collision checks performed
      self.c_obs.append(invalid_edges)
      g = self.cost_so_far[curr_node]
      
      for i, neighbor in enumerate(neighbors):
        new_g = g + edge_costs[i]
        
        if neighbor not in self.visited
          #Add neighbor to open only if it wasn't in open already (don't need duplicates) [Note: Only do this if ordering in the frontier doesn't matter]
          if neighbor not in self.cost_so_far:
            #Update the oracle frontier only during training (for mixing)
            if self.phase == "train":
              h_val = self.curr_oracle[neighbor]
              self.oracle_frontier.put(neighbor, h_val)
            self.frontier.append(neighbor)    
          #Keep track of cost of shortest path to neighbor and parent it came from (for features and reconstruct path)  
          if new_g < self.cost_so_far[neighbor]:
            self.came_from[neighbor] = (curr_node, valid_edges[i])
            self.cost_so_far[neighbor] = new_g

            #Update feature dicts
            self.learner.cost_so_far[neighbor] = new_g
            self.learner.num_invalid_predecessors[neighbor] = self.num_invalid_predecessors[curr_node] + n_invalid_edges
            self.learner.num_invalid_siblings[neighbor] = n_invalid_edges
            self.learner.depth_so_far[neighbor] = self.depth_so_far[curr_node] + 1
      #Step 6:increment number of expansions
      curr_expansions += 1

    if found_goal:
      path, path_cost = self.reconstruct_path(self.came_from, self.start_node, self.goal_node, self.cost_so_far)
    else:
      print ('Found no solution, priority queue empty')
    time_taken = time.time()- start_t
    return path, path_cost, curr_expansions, time_taken, self.came_from, self.cost_so_far, self.c_obs    #Run planner on current env and return data seetn. Also, update current env to next env

  def get_heuristic(self, node, goal):
    """Given a node and goal, calculate features and get heuristic value"""    
    return 0
  
  def get_best_node(self):
    """Evaluates all the nodes in the frontier and returns the best node"""
    return None
  
  def sample_world(self, mode='cycle'):
    self.curr_env_num = (self.curr_env_num+1)%self.num_envs
    file_path = os.path.join(os.path.abspath(self.env_folder), str(self.curr_env_num)+'.png')
    self.curr_env = initialize_env_from_file(file_path)

  def compute_oracle(self, mode='cycle'):
    file_path = os.path.join(os.path.abspath(self.oracle_folder), "oracle_"+str(self.curr_env_num)+'.p')
    self.curr_oracle = pickle.load(cost_so_far, open(file_path, 'rb')) 

  def initialize_env_from_file(self, file_path):
    env = Env2D()
    env.initialize(file_path, self.env_params)
    if self.visualize:
      self.env.initialize_plot(self.lattice.node_to_state(self.start_node), self.lattice.node_to_state(self.goal_node))
    self.initialized = True
    return env
  
  def clear_planner(self):
    self.frontier.clear()
    self.visited = {}
    self.c_obs = []
    self.cost_so_far = {}
    self.came_from = {}
    def plan(self, max_expansions=100000):
        assert self.initialized == True, "Planner has not been initialized properly. Please call initialize or reset_problem function before plan function"
        start_t = time.time()
        frontier = PriorityQueue()  #Initialize open list
        visited = dict()  #Initialized closed(visited) set
        c_obs = []  #Initialize Cobs list

        came_from = {}  #Keep track of edges a node came from
        cost_so_far = dict(
        )  #Keep track of cost of shortest path from start to each node visited so far

        came_from[self.goal_node] = (None, None)
        cost_so_far[self.goal_node] = 0.

        goal_h_val = self.get_heuristic(self.goal_node, self.start_node,
                                        came_from, cost_so_far, list(visited),
                                        c_obs)
        frontier.put(self.goal_node, 0 + self.heuristic_weight * goal_h_val,
                     self.heuristic_weight * goal_h_val)

        curr_expansions = 0  #Number of expansions done
        num_rexpansions = 0

        found_goal = False

        path = []
        path_cost = np.inf
        while not frontier.empty():
            #Check 1: Stop search if frontier gets too large
            if curr_expansions >= max_expansions:
                print("Max Expansions Done.")
                break
            #Check 2: Stop search if open list gets too large
            if frontier.size() > 500000:
                print("Timeout.")
                break

            #Step 1: Pop the best node from the frontier
            f, h, curr_node = frontier.get()

            if curr_node in visited:
                continue
            #Step 2: Add to visited
            visited[curr_node] = 1

            #Check 3: Stop search if start found
            if curr_node == self.start_node:
                print("Found goal")
                found_goal = True
                break

            #Step 3: If search has not ended, add neighbors of current node to frontier. We call get_predecessors instead of get_successors
            neighbors, edge_costs, valid_edges, invalid_edges = self.get_predecessors(
                curr_node)

            #Step 4: Update c_obs with collision checks performed
            c_obs.append(invalid_edges)
            g = cost_so_far[curr_node]

            for i, neighbor in enumerate(neighbors):
                new_g = g + edge_costs[i]
                if neighbor not in visited:
                    if neighbor not in cost_so_far or new_g < cost_so_far[
                            neighbor]:
                        came_from[neighbor] = (curr_node, valid_edges[i])
                        cost_so_far[neighbor] = new_g
                        h_val = self.heuristic_weight * self.get_heuristic(
                            neighbor, self.goal_node)
                        f_val = new_g + h_val
                        frontier.put(neighbor, f_val, h_val)

            #Step 5:increment number of expansions
            curr_expansions += 1

        if found_goal:
            path, path_cost = self.reconstruct_path(came_from, self.goal_node,
                                                    self.start_node,
                                                    cost_so_far)
        else:
            print('Found no solution, priority queue empty')
        plan_time = time.time() - start_t
        return path, path_cost, curr_expansions, plan_time, came_from, cost_so_far, c_obs
Exemple #7
0
class SaILPlanner(SearchBasedPlanner):
    def __init__(self):
        """Planner takes as input a planning problem object and returns
      the path and generated states
    @param problem   - planning problem object with following fields
    """
        self.frontier = PriorityQueue()  #Open list
        self.f_o = PriorityQueue()  #Frontier sorted according to oracle
        self.visited = {}  #Keep track of visited cells
        self.c_obs = set()  #Keep track of collision checks done so far
        self.cost_so_far = {}  #Keep track of cost of path to the node
        self.came_from = {}  #Keep track of parent during search
        self.depth_so_far = {}  #Keep track of depth in tree

        super(SaILPlanner, self).__init__()

    def initialize(self, problem):
        super(SaILPlanner, self).initialize(problem)

        self.euch = EuclideanHeuristicNoAng()
        self.manh = ManhattanHeuristicNoAng()
        if self.lattice.ndims == 3:
            self.dubh = DubinsHeuristic(self.lattice.radius)
        self.norm_terms = dict()
        min_st = np.array((self.lattice.x_lims[0], self.lattice.y_lims[0]))
        max_st = np.array((self.lattice.x_lims[1], self.lattice.y_lims[1]))
        self.norm_terms['euc'] = self.euch.get_heuristic(min_st, max_st)
        self.norm_terms['manhattan'] = self.manh.get_heuristic(min_st, max_st)
        print('Planner Initialized')

    def get_features(self, node):
        """Calculates features for the node give the current state of the search"""

        s = self.lattice.node_to_state(node)
        goal_s = self.lattice.node_to_state(self.goal_node)
        start_s = self.lattice.node_to_state(self.start_node)
        #calculate search based features
        features = list(s)
        features.append(self.cost_so_far[node])
        # features.append(self.euch.get_heuristic(s, start_s)/self.norm_terms['euc'])       #normalized euclidean distance to start
        features.append(
            self.euch.get_heuristic(s, goal_s)
        )  #/self.norm_terms['euc'] )       #normalized euclidean distance to goal
        # features.append(self.manh.get_heuristic(s, start_s)/self.norm_terms['manhattan']) #normalized manhattan distance to start
        features.append(
            self.manh.get_heuristic(s, goal_s)
        )  #/self.norm_terms['manhattan'])  #normalized manhattan distance to goal

        if self.lattice.ndims == 3:
            features.append(self.dubh.get_heuristic(
                s, start_s))  #normalized dubins distance to start
            features.append(self.dubh.get_heuristic(
                s, goal_s))  #normalized dubins distance to goal
            features.append(s[-1])  #heading of the state

        features.append(self.depth_so_far[node])
        features += list(goal_s)

        #calculate environment based features
        #if distance transform available, use that
        if self.env.distance_transform_available:
            d_obs, obs_dx, obs_dy = self.env.get_obstacle_distance(s)
            features += [d_obs, atan2(obs_dy, obs_dx) / pi]
            feature_arr = np.asarray(features, dtype=np.float32)
            feature_arr = (feature_arr - min(feature_arr)) / (
                max(feature_arr) - min(feature_arr))  #normalize

        else:  #work off of c_obs only
            if len(self.c_obs):
                closest_obs = []
                closest_obs_x = []
                closest_obs_y = []
                d_cobs_min = np.inf
                d_x_min = np.inf
                d_y_min = np.inf

                for obs_config in self.c_obs:
                    d = self.euch.get_heuristic(s, obs_config)
                    d_x, d_y = list(np.abs(s[0:2] - obs_config[0:2]))

                    if d < d_cobs_min:
                        d_cobs_min = d
                        closest_obs = obs_config
                    if d_x < d_x_min:
                        d_x_min = d_x
                        closest_obs_x = obs_config
                    if d_y < d_y_min:
                        d_y_min = d_y
                        closest_obs_y = obs_config

                features += [d_cobs_min] + list(closest_obs) + [
                    d_x_min
                ] + list(closest_obs_x) + [d_y_min] + list(closest_obs_y)
                #Normalize features here
                feature_arr = np.asarray(features, dtype=np.float32)
                feature_arr = (feature_arr - min(feature_arr)) / (
                    max(feature_arr) - min(feature_arr))  #normalize
            else:
                feature_arr = np.array(features, dtype=np.float32)
                feature_arr = (feature_arr - min(feature_arr)) / (
                    max(feature_arr) - min(feature_arr))  #normalize
                env_features = np.array(
                    [-1] * (3 + (self.lattice.ndims * 3)),
                    dtype=np.float32)  #-1 for all obstacles
                feature_arr = np.concatenate((feature_arr, env_features))

        return feature_arr

    def get_heuristic(self, node1, node2):
        """Requires a heuristic function that goes works off of feature"""
        if self.heuristic == None:
            return 0
        ftrs = self.get_features(node1)
        # s_2 = self.lattice.node_to_state(node2)
        h_val = self.heuristic.get_heuristic(ftrs)
        return h_val

    def clear_planner(self):
        self.frontier.clear()
        self.f_o.clear()
        self.visited.clear()
        self.c_obs.clear()
        self.cost_so_far.clear()
        self.came_from.clear()

    def collect_data(self, rand_idx, oracle):
        _, _, rand_node = self.frontier.get_idx(rand_idx)
        #we get features for that action
        rand_f = self.get_features(rand_node)
        #we query oracle for label
        y = oracle.get_optimal_q(rand_node)
        return rand_f, y

    def policy(self):
        h, _, curr_node = self.frontier.get()
        return h, curr_node

    def policy_mix(self, beta):
        """Implements the mixture policy"""
        if np.random.sample(1) < beta:
            h, _, curr_node = self.f_o.get()
            _, _, _ = self.frontier.pop_task(curr_node)
        else:
            h, _, curr_node = self.frontier.get()
            _, _, _ = self.f_o.pop_task(curr_node)

        return h, curr_node