def split_info(info: SASTff) -> Tuple[SASf, SASf, SAf]: c = {s: {a: zip_dict_of_tuple(v1) for a, v1 in v.items()} for s, v in info.items()} d = {k: zip_dict_of_tuple(v) for k, v in c.items()} d1, d2 = zip_dict_of_tuple(d) d3 = {s: {a: sum(np.prod(x) for x in v1.values()) for a, v1 in v.items()} for s, v in info.items()} return d1, d2, d3
def split_info(info: Type1) -> Tuple[Type2, Type2, Mapping[S, Mapping[A, float]]]: c = {s: {a: zip_dict_of_tuple(v1) for a, v1 in v.items()} for s, v in info.items()} d = {k: zip_dict_of_tuple(v) for k, v in c.items()} d1, d2 = zip_dict_of_tuple(d) d3 = {s: {a: sum(np.prod(x) for x in v1.values()) for a, v1 in v.items()} for s, v in info.items()} return d1, d2, d3
def __init__(self, info: Mapping[S, Mapping[A, Tuple[Mapping[S, float], float]]], gamma: float) -> None: if verify_mdp(info): d = {k: zip_dict_of_tuple(v) for k, v in info.items()} d1, d2 = zip_dict_of_tuple(d) self.all_states: Set[S] = get_all_states(info) self.state_action_dict: Mapping[S, Set[A]] = \ get_actions_for_states(info) self.transitions: Mapping[S, Mapping[A, Mapping[S, float]]] = \ {s: {a: get_lean_transitions(v1) for a, v1 in v.items()} for s, v in d1.items()} self.rewards: Mapping[S, Mapping[A, float]] = d2 self.gamma: float = gamma self.terminal_states: Set[S] = self.get_terminal_states() else: raise ValueError
def __init__(self, info: STSff, gamma: float): d1, d2 = zip_dict_of_tuple(info) super().__init__(d1) self.gamma: float = gamma self.rewards: Mapping[S, float] = d2 self.terminal_states = self.get_terminal_states() self.nt_states_list: Sequence[S] = self.get_nt_states_list() self.trans_matrix: np.ndarray = self.get_trans_matrix() self.rewards_vec: np.ndarray = self.get_rewards_vec()
def split_info(info: SSTff) -> Tuple[SSf, SSf, Mapping[S, float]]: d = {k: zip_dict_of_tuple(v) for k, v in info.items()} d1, d2 = zip_dict_of_tuple(d) d3 = {k: sum(np.prod(x) for x in v.values()) for k, v in info.items()} return d1, d2, d3