Esempio n. 1
0
 def _maybe_add_specific(self, preconds, value, value_threshold,
                         specific_frontier, above_threshold):
     if value > value_threshold:
         if not any(
                 state_subsumes(preconds, fs) for fs, v in above_threshold):
             for fs, v in set(above_threshold):
                 if state_subsumes(fs, preconds):
                     above_threshold.remove((fs, v))
             above_threshold.add((preconds, value))
         # specific_frontier.insert(0, preconds)
         heapq.heappush(specific_frontier,
                        (len(preconds[0]) + len(preconds[1]), preconds))
Esempio n. 2
0
    def _explore(self):
        done = False
        hit_backward_state = False

        all_states = []
        s = self.env.last_observation()

        # This needs to be stored globally.
        while not hit_backward_state:
            a = self.policy.action(s)
            all_states.append((s, a))
            sp, reward, done, info = self.executor.execute_core_action(a)
            self.brain.performance.cumulative_reward += reward

            pos_state = frozenset(self.detector.interpret(sp))
            fs = pos_state, frozenset(self.brain.wm.task.facts - pos_state)

            for ps in self.brain.ltm.cached_plans.keys():
                if state_subsumes(fs, ps):
                    self.brain.wm.current_plan = self.brain.ltm.cached_plans[
                        ps]
                    hit_backward_state = True
                    break
            if not hit_backward_state and fs not in self.brain.ltm.unplannable_states:
                search = ForwardSearch()
                task = self.brain.wm.task.copy_with_new_initial_state(
                    pos_state)
                plan, frontier, visited = search.search(
                    task, set(self.brain.ltm.unplannable_states))
                if plan:
                    generalized_fs = task.goals
                    for i in range(1, len(plan) + 1):
                        generalized_fs = plan[-i].regress(generalized_fs)
                        if generalized_fs not in self.brain.ltm.cached_plans:
                            self.brain.ltm.cached_plans[generalized_fs] = plan[
                                -i:]
                            learner = TabularOperatorLearner(
                                self.env, self.brain.motor.state_hasher.hash,
                                self.executor,
                                self._get_operator(generalized_fs),
                                self.detector)
                            self.learners.append(learner)
                            # New learners need to learn from that first sweet timestep; otherwise they'll learn from the
                            # executor.
                            learner.train(s, a, reward, sp)
                            self.learners[0].add_learner_and_revise_reward(
                                learner, s, a, reward, sp)
                            self.executor.attach_learner(learner)
                    hit_backward_state = True
                    self.brain.wm.current_plan = self.brain.ltm.cached_plans[
                        generalized_fs]
                else:
                    self.brain.ltm.unplannable_states = visited

            s = sp
            if done:
                self.brain.wm.current_plan = []
                # self.env.reset()
                break
Esempio n. 3
0
 def _maybe_add_general(self, preconds, value, value_threshold,
                        general_frontier, above_threshold):
     if value > value_threshold:
         for fs, v in set(above_threshold):
             if state_subsumes(fs, preconds):
                 above_threshold.remove((fs, v))
         above_threshold.add((preconds, value))
     else:
         general_frontier.insert(0, preconds)
Esempio n. 4
0
 def _specify(self, fs, unique_fs):
     common_knowledge = None
     for pos, neg in unique_fs:
         if not state_subsumes((pos, neg), fs):
             continue
         if common_knowledge is None:
             common_knowledge = pos, neg
         else:
             common_knowledge = common_knowledge[0] & pos, common_knowledge[
                 1] & neg
     return common_knowledge
Esempio n. 5
0
 def get_avg_for_precondition(self, unique_fs_values, num_states, precond):
     relevant_fs = [
         specific_fs for specific_fs in unique_fs_values
         if state_subsumes(specific_fs, precond)
     ]
     if len(relevant_fs) == 0:
         return 0.
     num = 0.
     denom = 0.
     for fs in relevant_fs:
         num += unique_fs_values[fs] * num_states[fs]
         denom += num_states[fs]
     return num / denom
Esempio n. 6
0
 def _run(self):
     # TODO implement plan caching and unplannability caching in the planning mode so we don't have to
     #  replan every time.
     if self.planner == "bfs":
         # bfs = BFS()
         forward_search = ForwardSearch()
         # task = self.brain.wm.task
         pos_init = frozenset(self.brain.motor.perceive())
         task = self.brain.wm.task.copy_with_new_initial_state(pos_init)
         full_init = task.initial_state
         if full_init in self.brain.ltm.unplannable_states:
             log.info(
                 "We know this state is unplannable. Skip right to exploration mode."
             )
             self.brain.affect.planning_impasse = True
             return
         subsumes = list(
             filter(lambda fs: state_subsumes(full_init, fs),
                    self.brain.ltm.cached_plans.keys()))
         if len(subsumes) > 0:
             log.info(
                 "We know this state is plannable! Don't bother planning -- we already know this."
             )
             self.plan_exists = True
             self.brain.wm.current_plan = self.brain.ltm.cached_plans[
                 subsumes[0]]
             return
         log.info("Goal: {}".format(task.goals))
         log.info("Searching for plan...")
         # frontier: the end of the road in this particular search
         # visited: all states visited
         plan, frontier, visited = forward_search.search(task)
         self.brain.wm.forward_states = visited
         log.info("Plan: {}".format(plan))
         # common, fluents = self._get_common_fluents(visited)
         self.brain.wm.clear_operator_stack()
         if plan:
             self.plan_exists = True
             self.brain.wm.current_plan = plan
         else:
             self.brain.affect.planning_impasse = True
Esempio n. 7
0
 def _expand_general_node(self, V, general_frontier, applicable_states,
                          value_threshold, specific_fs, specific_fs_values,
                          num_states, all_fluents, above_threshold):
     pos, neg = general_frontier.pop()
     unused_fluents = set(all_fluents) - pos - neg
     new_pos_conditions = set(
         self._specify((frozenset(pos | {pos_add}), neg), specific_fs)
         for pos_add in unused_fluents)
     new_neg_conditions = set(
         self._specify((pos, frozenset(neg | {neg_add})), specific_fs)
         for neg_add in unused_fluents)
     new_conditions = new_pos_conditions | new_neg_conditions
     for new_condition in new_conditions:
         # Exclude anything that doesn't apply in at least one of the applicable states
         if new_condition not in self._precondition_values.keys() and \
            any(state_subsumes(app_state, new_condition) for app_state in applicable_states):
             value = self.get_avg_for_precondition(specific_fs_values,
                                                   num_states,
                                                   new_condition)
             self._precondition_values[new_condition] = value
             self._maybe_add_general(new_condition, value, value_threshold,
                                     general_frontier, above_threshold)