def sample(self, batch_size): states, contexts, actions, rewards, next_states, next_contexts, dones =\ zip(*array_random_choice(self.buffer, count=batch_size, random=self.random)) n_parts: int = len(states[0]) return (self._concatenate_states_(states), contexts, actions, rewards, self._concatenate_states_(next_states), next_contexts, dones)
def select(self, state: State, possibilities: List[Option[OptionData]], prev_option: Optional[Option[OptionData]], parent_option: Option[OptionData]) -> Option[OptionData]: """ Conditioned on the current state and option, chooses the next suboption to pursue as a suboption to 'option' from the list of possibilities Parameters ---------- state: State the state that the agent is at when the chosen option will begin to be pursued possibilities: List[Option[OptionData]] the possible suboptions to pursue. Length must be at least 1 prev_option: Optional[Option[OptionData]] the last option that was pursue. Will be None at start of new episode parent_option: Option[OptionData] the parent option that the chosen option will be a direct child of Returns ------- chosen: Option the option that was chosen to pursue next """ probabilities: np.ndarray = self._selection_probabilities_( state, possibilities, prev_option, parent_option, normalize=False) # np.ndarray[float]: [len(possibilities), ] return array_random_choice(possibilities, probabilities, self.random)
def _random_tile_of_type( self, required_type: str = None, rand_seed: Union[int, RandomState] = None) -> Point: random: np.random = optional_random(rand_seed) possibilities: List[Point] = self._all_tiles_of_type(required_type) return array_random_choice(possibilities, random=random)
def sample( self, num_samples: int = 1 ) -> List[TrainSample[State, Action, Reward, OptionData]]: """ Parameters ---------- num_samples: int = 1 Returns ------- samples: List[TrainSample] : [num_samples, ] """ result: List[TrainSample[State, Action, Reward, OptionData]] = [] for _ in range(num_samples): episode_root_node: Node[Option[OptionData]] = array_random_choice( self.list_roots, random=self.random) parent_option_node: Node[Option[OptionData]] = array_random_choice( Tree.list_nodes(episode_root_node), random=self.random) child_option_node: Node[Option[OptionData]] = array_random_choice( Tree.list_nodes(parent_option_node), random=self.random) prev_option_node: Optional[Node[Option[OptionData]]] = \ Tree.get_next_left_parent(parent_option_node) prev_option: Optional[Option[OptionData]] = \ prev_option_node.value if prev_option_node is not None else None suboption_trajectory: Trajectory[State, Action, Reward] = \ self.trajectory_for(child_option_node) option_trajectory: Trajectory[State, Action, Reward] = \ self.trajectory_for(parent_option_node) initial_state: State = self.initial_state_for(child_option_node) midpoint_state: State = self.terminal_state_for(child_option_node) terminal_state: State = self.terminal_state_for(parent_option_node) result.append( TrainSample(prev_option, initial_state, suboption_trajectory, child_option_node.value, midpoint_state, option_trajectory, parent_option_node.value, terminal_state)) return result
def get_random_action(self): return array_random_choice(self.env.actions, self.random)