Example #1
0
    def expand(self, next_layer, count=1):
        """
            Expand the node by querying the oracle model for every possible action
        :param next_layer: list of nodes at the next depth, to be updated with new children nodes
        :param count: number of times each transition must be evaluated
        """
        if self.state is None:
            raise Exception("The state should be set before expanding a node")
        try:
            actions = self.state.get_available_actions()
        except AttributeError:
            actions = range(1, self.state.action_space.n)

        self.planner.openings += count

        if self.done and PlaTyPOOSNode.STOP_ON_ANY_TERMINAL_STATE:
            return

        for _ in range(count):
            for action in actions:
                state = safe_deepcopy_env(self.state)
                _, reward, done, _ = state.step(action)

                if action not in self.children:
                    self.children[action] = type(self)(self,
                                                       self.planner,
                                                       state,
                                                       depth=self.depth + 1)
                    next_layer.append(self.children[action])

                self.children[action].update(reward, done)
Example #2
0
 def plan(self, state, observation):
     for i in range(self.config['iterations']):
         if (i + 1) % 10 == 0:
             logger.debug('{} / {}'.format(i + 1,
                                           self.config['iterations']))
         self.run(safe_deepcopy_env(state), observation)
     return self.get_plan()
Example #3
0
    def plan(self, observation):
        action_distribution = Normal(
            torch.zeros(self.config["horizon"], self.action_size),
            torch.ones(self.config["horizon"], self.action_size))
        for i in range(self.config["iterations"]):
            # Evaluate J action sequences from the current belief (in batch)
            actions = action_distribution.sample([self.config["candidates"]
                                                  ])  # Sample actions
            candidates = [
                safe_deepcopy_env(self.env)
                for _ in range(self.config["candidates"])
            ]
            returns = torch.zeros(self.config["candidates"])
            # Sample next states
            for t in range(self.config["horizon"]):
                for c, candidate in enumerate(candidates):
                    _, reward, _, _ = candidate.step(actions[c, t])
                    returns[c] += self.config["gamma"]**t * reward

            # Re-fit belief to the K best action sequences
            _, topk = returns.topk(self.config["top_candidates"],
                                   largest=True,
                                   sorted=False)  # K ← argsort({R(j)}
            best_actions = actions[topk]
            # Update belief with new means and standard deviations
            action_distribution = Normal(
                best_actions.mean(dim=0),
                best_actions.std(dim=0, unbiased=False))
        # Return first action mean µ_t
        return action_distribution.mean.tolist()
Example #4
0
    def plan(self, state, observation):
        for self.episode in range(self.config['episodes']):
            if (self.episode + 1) % max(self.config['episodes'] // 10, 1) == 0:
                logger.debug('{} / {}'.format(self.episode + 1,
                                              self.config['episodes']))
            self.run(safe_deepcopy_env(state))

        return self.get_plan()
Example #5
0
    def expand(self, state, leaves, update_children=False):
        if state is None:
            raise Exception("The state should be set before expanding a node")
        try:
            actions = state.get_available_actions()
        except AttributeError:
            actions = range(state.action_space.n)
        for action in actions:
            self.children[action] = type(self)(self, self.planner)
            if update_children:
                _, reward, done, _ = safe_deepcopy_env(state).step(action)
                self.children[action].update(reward, done)

        idx = leaves.index(self)
        leaves = leaves[:idx] + list(self.children.values()) + leaves[idx + 1:]
        return leaves
Example #6
0
    def expand(self, leaves):
        if self.state is None:
            raise Exception("The state should be set before expanding a node")
        try:
            actions = self.state.get_available_actions()
        except AttributeError:
            actions = range(self.state.action_space.n)
        for action in actions:
            self.children[action] = type(self)(self,
                                               self.planner,
                                               state=safe_deepcopy_env(
                                                   self.state),
                                               depth=self.depth + 1)
            _, reward, done, _ = self.children[action].state.step(action)
            self.children[action].update(reward, done)

        leaves.remove(self)
        leaves.extend(self.children.values())