Esempio n. 1
0
 def sample(self, batch_size):
     states, contexts, actions, rewards, next_states, next_contexts, dones =\
         zip(*array_random_choice(self.buffer,
             count=batch_size, random=self.random))
     n_parts: int = len(states[0])
     return (self._concatenate_states_(states), contexts, actions, rewards,
             self._concatenate_states_(next_states), next_contexts, dones)
Esempio n. 2
0
 def select(self, state: State, possibilities: List[Option[OptionData]],
            prev_option: Optional[Option[OptionData]],
            parent_option: Option[OptionData]) -> Option[OptionData]:
     """
         Conditioned on the current state and option, chooses the next 
         suboption to pursue as a suboption to 'option' from the list of 
         possibilities
         Parameters
         ----------
         state: State
             the state that the agent is at when the chosen option will
             begin to be pursued
         possibilities: List[Option[OptionData]]
             the possible suboptions to pursue. Length must be at least 1
         prev_option: Optional[Option[OptionData]]
             the last option that was pursue. Will be None at start of
             new episode
         parent_option: Option[OptionData]
             the parent option that the chosen option will be a direct
             child of
         Returns
         -------
         chosen: Option
             the option that was chosen to pursue next
     """
     probabilities: np.ndarray = self._selection_probabilities_(
         state, possibilities, prev_option, parent_option, normalize=False)
     # np.ndarray[float]: [len(possibilities), ]
     return array_random_choice(possibilities, probabilities, self.random)
Esempio n. 3
0
 def _random_tile_of_type(
         self,
         required_type: str = None,
         rand_seed: Union[int, RandomState] = None) -> Point:
     random: np.random = optional_random(rand_seed)
     possibilities: List[Point] = self._all_tiles_of_type(required_type)
     return array_random_choice(possibilities, random=random)
Esempio n. 4
0
    def sample(
        self,
        num_samples: int = 1
    ) -> List[TrainSample[State, Action, Reward, OptionData]]:
        """
            Parameters
            ----------
            num_samples: int = 1
            Returns
            -------
            samples: List[TrainSample] : [num_samples, ]
        """
        result: List[TrainSample[State, Action, Reward, OptionData]] = []
        for _ in range(num_samples):
            episode_root_node: Node[Option[OptionData]] = array_random_choice(
                self.list_roots, random=self.random)
            parent_option_node: Node[Option[OptionData]] = array_random_choice(
                Tree.list_nodes(episode_root_node), random=self.random)
            child_option_node: Node[Option[OptionData]] = array_random_choice(
                Tree.list_nodes(parent_option_node), random=self.random)
            prev_option_node: Optional[Node[Option[OptionData]]] = \
                Tree.get_next_left_parent(parent_option_node)
            prev_option: Optional[Option[OptionData]] = \
                prev_option_node.value if prev_option_node is not None else None

            suboption_trajectory: Trajectory[State, Action, Reward] = \
                self.trajectory_for(child_option_node)
            option_trajectory: Trajectory[State, Action, Reward] = \
                self.trajectory_for(parent_option_node)

            initial_state: State = self.initial_state_for(child_option_node)
            midpoint_state: State = self.terminal_state_for(child_option_node)
            terminal_state: State = self.terminal_state_for(parent_option_node)

            result.append(
                TrainSample(prev_option, initial_state, suboption_trajectory,
                            child_option_node.value, midpoint_state,
                            option_trajectory, parent_option_node.value,
                            terminal_state))
        return result
Esempio n. 5
0
 def get_random_action(self):
     return array_random_choice(self.env.actions, self.random)