class OnlineDFSAgent(LocalSearch): def __init__(self, problem, pts_function): super().__init__(problem, pts_function) def init(self): self.alive = True self.s = None self.a = None self.untried = {} self.unbacktracked = {} self.result = {} # function ONLINE-DFS-AGENT(s') returns an action # inputs: s', a percept that identifies the current state def execute(self, percept): s_prime = self.pts_function.get_state(percept) # if GOAL-TEST(s') then return stop if self._is_goal_state(s_prime): self.a = NoOpAction() else: # if s' is a new state (not in untried) then untried[s'] <- ACTIONS(s') if s_prime not in self.untried.keys(): self.untried[s_prime] = self._actions(s_prime) # if s is not null then do if self.s != None: # Note: If I've already seen the result of this # [s, a] then don't put it back on the unbacktracked # list otherwise you can keep oscillating # between the same states endlessly. if s_prime != self.result.get((self.s, self.a)): self.result[(self.s, self.a)] = s_prime lst = self.unbacktracked.setdefault(s_prime, []) lst.insert(0, self.s) # if untried[s'] is empty then if len(self.untried.get(s_prime)) == 0: if len(self.unbacktracked.get(s_prime)) == 0: self.a = NoOpAction() else: # else a <- an action b such that result[s', b] = POP(unbacktracked[s']) popped = self.unbacktracked[s_prime].pop(0) for (s, b) in self.result.keys(): if s == s_prime and self.result[(s, b)] == popped: self.a = b break else: # else a <- POP(untried[s']) self.a = self.untried[s_prime].pop(0) if self.a.is_noop(): self.alive = False # s <- s' self.s = s_prime # return a return self.a
def execute(self, percept): s_prime = self.pts_function.get_state(percept) # if GOAL-TEST(s') then return stop if self._is_goal_state(s_prime): self.a = NoOpAction() else: # if s' is a new state (not in untried) then untried[s'] <- ACTIONS(s') if s_prime not in self.untried.keys(): self.untried[s_prime] = self._actions(s_prime) # if s is not null then do if self.s != None: # Note: If I've already seen the result of this # [s, a] then don't put it back on the unbacktracked # list otherwise you can keep oscillating # between the same states endlessly. if s_prime != self.result.get((self.s, self.a)): self.result[(self.s, self.a)] = s_prime lst = self.unbacktracked.setdefault(s_prime, []) lst.insert(0, self.s) # if untried[s'] is empty then if len(self.untried.get(s_prime)) == 0: if len(self.unbacktracked.get(s_prime)) == 0: self.a = NoOpAction() else: # else a <- an action b such that result[s', b] = POP(unbacktracked[s']) popped = self.unbacktracked[s_prime].pop(0) for (s, b) in self.result.keys(): if s == s_prime and self.result[(s, b)] == popped: self.a = b break else: # else a <- POP(untried[s']) self.a = self.untried[s_prime].pop(0) if self.a.is_noop(): self.alive = False # s <- s' self.s = s_prime # return a return self.a
def execute(self, percept): s_prime = self.pts_function.get_state(percept) # if GOAL-TEST(s') then return stop if self._is_goal_state(s_prime): self.a = NoOpAction() else: # if s' is a new state (not in H) then H[s'] <- h(s') if s_prime not in self.H.keys(): self.H[s_prime] = self.heuristic_function.h(s_prime) # if s is not null if self.s != None: # result[s, a] <- s' self.result[(self.s, self.a)] = s_prime # H[s] <- min LRTA*-COST(s, b, result[s, b], H) # b (element of) ACTIONS(s) m = min([ self._lrta_cost(self.s, b) for b in self._actions(self.s) ]) self.H[self.s] = m # a <- an action b in ACTIONS(s') that minimizes LRTA*-COST(s', b, # result[s', b], H) m = PlusInfinity() self.a = NoOpAction() for b in self._actions(s_prime): cost = self._lrta_cost(s_prime, b) if cost < m: m = cost self.a = b # s <- s' self.s = s_prime if self.a.is_noop(): self.alive = False # return a return self.a
def actions_from_nodes(node_list): if len(node_list) == 1: return [NoOpAction()] else: actions = [] for i in range(1, len(node_list)): node = node_list[i] actions.append(node.get_action()) return actions
def execute(self, percept): s_prime = self.pts_function.get_state(percept) # if GOAL-TEST(s') then return stop if self._is_goal_state(s_prime): self.a = NoOpAction() else: # if s' is a new state (not in H) then H[s'] <- h(s') if s_prime not in self.H.keys(): self.H[s_prime] = self.heuristic_function.h(s_prime) # if s is not null if self.s != None: # result[s, a] <- s' self.result[(self.s, self.a)] = s_prime # H[s] <- min LRTA*-COST(s, b, result[s, b], H) # b (element of) ACTIONS(s) m = min([self._lrta_cost(self.s, b) for b in self._actions(self.s)]) self.H[self.s] = m # a <- an action b in ACTIONS(s') that minimizes LRTA*-COST(s', b, # result[s', b], H) m = PlusInfinity() self.a = NoOpAction() for b in self._actions(s_prime): cost = self._lrta_cost(s_prime, b) if cost < m: m = cost self.a = b # s <- s' self.s = s_prime if self.a.is_noop(): self.alive = False # return a return self.a
def test_already_at_goal(self): me = MapEnvironment(self.map) agent = OnlineDFSAgent( OnlineSearchProblem(MapActionFunction(self.map), MapGoalTestFunction("A"), MapStepCostFunction(self.map)), MapPerceptToStateFunction()) me.add_new_agent(agent, "A") expected_actions = [NoOpAction()] me.add_environment_view(EnvironmentViewMock(expected_actions)) me.step_until_done()
def test_search_start_at_goal_state(self): finish = RomaniaCities.BUCHAREST start = RomaniaCities.BUCHAREST rm = get_simplified_road_map_of_part_of_romania() rbfs = RecursiveBestFirstSearch( AStarEvaluationFunction(MapHeuristicFunction(rm, finish))) p = Problem(start, MapActionFunction(rm), MapResultFunction(), MapGoalTestFunction(finish), MapStepCostFunction(rm)) result = rbfs.search(p) self.assertFalse(rbfs.is_failure(result)) self.assertEqual(1, len(result)) self.assertEqual(NoOpAction(), result[0])
def test_search_start_at_goal_state(self): finish = RomaniaCities.BUCHAREST start = RomaniaCities.BUCHAREST rm = get_simplified_road_map_of_part_of_romania() ts = TreeSearch() ass = AStarSearch(ts, MapHeuristicFunction(rm, finish)) p = Problem(start, MapActionFunction(rm), MapResultFunction(), MapGoalTestFunction(finish), MapStepCostFunction(rm)) result = ass.search(p) self.assertFalse(ass.is_failure(result)) self.assertEqual(1, len(result)) self.assertEqual(NoOpAction(), result[0])
class LRTAStarAgent(LocalSearch): def __init__(self, problem, pts_function, heuristic_function): super().__init__(problem, pts_function) self.heuristic_function = heuristic_function def init(self): self.alive = True self.result = {} self.H = {} self.s = None self.a = None # function LRTA*-AGENT(s') returns an action # inputs: s', a percept that identifies the current state def execute(self, percept): s_prime = self.pts_function.get_state(percept) # if GOAL-TEST(s') then return stop if self._is_goal_state(s_prime): self.a = NoOpAction() else: # if s' is a new state (not in H) then H[s'] <- h(s') if s_prime not in self.H.keys(): self.H[s_prime] = self.heuristic_function.h(s_prime) # if s is not null if self.s != None: # result[s, a] <- s' self.result[(self.s, self.a)] = s_prime # H[s] <- min LRTA*-COST(s, b, result[s, b], H) # b (element of) ACTIONS(s) m = min([self._lrta_cost(self.s, b) for b in self._actions(self.s)]) self.H[self.s] = m # a <- an action b in ACTIONS(s') that minimizes LRTA*-COST(s', b, # result[s', b], H) m = PlusInfinity() self.a = NoOpAction() for b in self._actions(s_prime): cost = self._lrta_cost(s_prime, b) if cost < m: m = cost self.a = b # s <- s' self.s = s_prime if self.a.is_noop(): self.alive = False # return a return self.a # function LRTA*-COST(s, a, s', H) returns a cost estimate def _lrta_cost(self, s, action): s_prime = self.result.get((s, action)) # if s' is undefined then return h(s) if s_prime == None: return self.heuristic_function.h(s) # else return c(s, a, s') + H[s'] return self.problem.step_cost_function.c(s, action, s_prime) + self.H[s_prime]
def get_move_to_actions_array(locations): return [MoveToAction(location) for location in locations] + [NoOpAction()]
class LRTAStarAgent(LocalSearch): def __init__(self, problem, pts_function, heuristic_function): super().__init__(problem, pts_function) self.heuristic_function = heuristic_function def init(self): self.alive = True self.result = {} self.H = {} self.s = None self.a = None # function LRTA*-AGENT(s') returns an action # inputs: s', a percept that identifies the current state def execute(self, percept): s_prime = self.pts_function.get_state(percept) # if GOAL-TEST(s') then return stop if self._is_goal_state(s_prime): self.a = NoOpAction() else: # if s' is a new state (not in H) then H[s'] <- h(s') if s_prime not in self.H.keys(): self.H[s_prime] = self.heuristic_function.h(s_prime) # if s is not null if self.s != None: # result[s, a] <- s' self.result[(self.s, self.a)] = s_prime # H[s] <- min LRTA*-COST(s, b, result[s, b], H) # b (element of) ACTIONS(s) m = min([ self._lrta_cost(self.s, b) for b in self._actions(self.s) ]) self.H[self.s] = m # a <- an action b in ACTIONS(s') that minimizes LRTA*-COST(s', b, # result[s', b], H) m = PlusInfinity() self.a = NoOpAction() for b in self._actions(s_prime): cost = self._lrta_cost(s_prime, b) if cost < m: m = cost self.a = b # s <- s' self.s = s_prime if self.a.is_noop(): self.alive = False # return a return self.a # function LRTA*-COST(s, a, s', H) returns a cost estimate def _lrta_cost(self, s, action): s_prime = self.result.get((s, action)) # if s' is undefined then return h(s) if s_prime == None: return self.heuristic_function.h(s) # else return c(s, a, s') + H[s'] return self.problem.step_cost_function.c(s, action, s_prime) + self.H[s_prime]