def _maybe_add_specific(self, preconds, value, value_threshold, specific_frontier, above_threshold): if value > value_threshold: if not any( state_subsumes(preconds, fs) for fs, v in above_threshold): for fs, v in set(above_threshold): if state_subsumes(fs, preconds): above_threshold.remove((fs, v)) above_threshold.add((preconds, value)) # specific_frontier.insert(0, preconds) heapq.heappush(specific_frontier, (len(preconds[0]) + len(preconds[1]), preconds))
def _explore(self): done = False hit_backward_state = False all_states = [] s = self.env.last_observation() # This needs to be stored globally. while not hit_backward_state: a = self.policy.action(s) all_states.append((s, a)) sp, reward, done, info = self.executor.execute_core_action(a) self.brain.performance.cumulative_reward += reward pos_state = frozenset(self.detector.interpret(sp)) fs = pos_state, frozenset(self.brain.wm.task.facts - pos_state) for ps in self.brain.ltm.cached_plans.keys(): if state_subsumes(fs, ps): self.brain.wm.current_plan = self.brain.ltm.cached_plans[ ps] hit_backward_state = True break if not hit_backward_state and fs not in self.brain.ltm.unplannable_states: search = ForwardSearch() task = self.brain.wm.task.copy_with_new_initial_state( pos_state) plan, frontier, visited = search.search( task, set(self.brain.ltm.unplannable_states)) if plan: generalized_fs = task.goals for i in range(1, len(plan) + 1): generalized_fs = plan[-i].regress(generalized_fs) if generalized_fs not in self.brain.ltm.cached_plans: self.brain.ltm.cached_plans[generalized_fs] = plan[ -i:] learner = TabularOperatorLearner( self.env, self.brain.motor.state_hasher.hash, self.executor, self._get_operator(generalized_fs), self.detector) self.learners.append(learner) # New learners need to learn from that first sweet timestep; otherwise they'll learn from the # executor. learner.train(s, a, reward, sp) self.learners[0].add_learner_and_revise_reward( learner, s, a, reward, sp) self.executor.attach_learner(learner) hit_backward_state = True self.brain.wm.current_plan = self.brain.ltm.cached_plans[ generalized_fs] else: self.brain.ltm.unplannable_states = visited s = sp if done: self.brain.wm.current_plan = [] # self.env.reset() break
def _maybe_add_general(self, preconds, value, value_threshold, general_frontier, above_threshold): if value > value_threshold: for fs, v in set(above_threshold): if state_subsumes(fs, preconds): above_threshold.remove((fs, v)) above_threshold.add((preconds, value)) else: general_frontier.insert(0, preconds)
def _specify(self, fs, unique_fs): common_knowledge = None for pos, neg in unique_fs: if not state_subsumes((pos, neg), fs): continue if common_knowledge is None: common_knowledge = pos, neg else: common_knowledge = common_knowledge[0] & pos, common_knowledge[ 1] & neg return common_knowledge
def get_avg_for_precondition(self, unique_fs_values, num_states, precond): relevant_fs = [ specific_fs for specific_fs in unique_fs_values if state_subsumes(specific_fs, precond) ] if len(relevant_fs) == 0: return 0. num = 0. denom = 0. for fs in relevant_fs: num += unique_fs_values[fs] * num_states[fs] denom += num_states[fs] return num / denom
def _run(self): # TODO implement plan caching and unplannability caching in the planning mode so we don't have to # replan every time. if self.planner == "bfs": # bfs = BFS() forward_search = ForwardSearch() # task = self.brain.wm.task pos_init = frozenset(self.brain.motor.perceive()) task = self.brain.wm.task.copy_with_new_initial_state(pos_init) full_init = task.initial_state if full_init in self.brain.ltm.unplannable_states: log.info( "We know this state is unplannable. Skip right to exploration mode." ) self.brain.affect.planning_impasse = True return subsumes = list( filter(lambda fs: state_subsumes(full_init, fs), self.brain.ltm.cached_plans.keys())) if len(subsumes) > 0: log.info( "We know this state is plannable! Don't bother planning -- we already know this." ) self.plan_exists = True self.brain.wm.current_plan = self.brain.ltm.cached_plans[ subsumes[0]] return log.info("Goal: {}".format(task.goals)) log.info("Searching for plan...") # frontier: the end of the road in this particular search # visited: all states visited plan, frontier, visited = forward_search.search(task) self.brain.wm.forward_states = visited log.info("Plan: {}".format(plan)) # common, fluents = self._get_common_fluents(visited) self.brain.wm.clear_operator_stack() if plan: self.plan_exists = True self.brain.wm.current_plan = plan else: self.brain.affect.planning_impasse = True
def _expand_general_node(self, V, general_frontier, applicable_states, value_threshold, specific_fs, specific_fs_values, num_states, all_fluents, above_threshold): pos, neg = general_frontier.pop() unused_fluents = set(all_fluents) - pos - neg new_pos_conditions = set( self._specify((frozenset(pos | {pos_add}), neg), specific_fs) for pos_add in unused_fluents) new_neg_conditions = set( self._specify((pos, frozenset(neg | {neg_add})), specific_fs) for neg_add in unused_fluents) new_conditions = new_pos_conditions | new_neg_conditions for new_condition in new_conditions: # Exclude anything that doesn't apply in at least one of the applicable states if new_condition not in self._precondition_values.keys() and \ any(state_subsumes(app_state, new_condition) for app_state in applicable_states): value = self.get_avg_for_precondition(specific_fs_values, num_states, new_condition) self._precondition_values[new_condition] = value self._maybe_add_general(new_condition, value, value_threshold, general_frontier, above_threshold)