Example #1
0
 def split_info(info: SASTff) -> Tuple[SASf, SASf, SAf]:
     c = {s: {a: zip_dict_of_tuple(v1) for a, v1 in v.items()}
          for s, v in info.items()}
     d = {k: zip_dict_of_tuple(v) for k, v in c.items()}
     d1, d2 = zip_dict_of_tuple(d)
     d3 = {s: {a: sum(np.prod(x) for x in v1.values())
               for a, v1 in v.items()} for s, v in info.items()}
     return d1, d2, d3
Example #2
0
 def split_info(info: Type1) -> Tuple[Type2, Type2,
                                      Mapping[S, Mapping[A, float]]]:
     c = {s: {a: zip_dict_of_tuple(v1) for a, v1 in v.items()}
          for s, v in info.items()}
     d = {k: zip_dict_of_tuple(v) for k, v in c.items()}
     d1, d2 = zip_dict_of_tuple(d)
     d3 = {s: {a: sum(np.prod(x) for x in v1.values())
               for a, v1 in v.items()} for s, v in info.items()}
     return d1, d2, d3
Example #3
0
 def __init__(self, info: Mapping[S, Mapping[A, Tuple[Mapping[S, float],
                                                      float]]],
              gamma: float) -> None:
     if verify_mdp(info):
         d = {k: zip_dict_of_tuple(v) for k, v in info.items()}
         d1, d2 = zip_dict_of_tuple(d)
         self.all_states: Set[S] = get_all_states(info)
         self.state_action_dict: Mapping[S, Set[A]] = \
             get_actions_for_states(info)
         self.transitions: Mapping[S, Mapping[A, Mapping[S, float]]] = \
             {s: {a: get_lean_transitions(v1) for a, v1 in v.items()}
              for s, v in d1.items()}
         self.rewards: Mapping[S, Mapping[A, float]] = d2
         self.gamma: float = gamma
         self.terminal_states: Set[S] = self.get_terminal_states()
     else:
         raise ValueError
Example #4
0
 def __init__(self, info: STSff, gamma: float):
     d1, d2 = zip_dict_of_tuple(info)
     super().__init__(d1)
     self.gamma: float = gamma
     self.rewards: Mapping[S, float] = d2
     self.terminal_states = self.get_terminal_states()
     self.nt_states_list: Sequence[S] = self.get_nt_states_list()
     self.trans_matrix: np.ndarray = self.get_trans_matrix()
     self.rewards_vec: np.ndarray = self.get_rewards_vec()
Example #5
0
 def split_info(info: SSTff) -> Tuple[SSf, SSf, Mapping[S, float]]:
     d = {k: zip_dict_of_tuple(v) for k, v in info.items()}
     d1, d2 = zip_dict_of_tuple(d)
     d3 = {k: sum(np.prod(x) for x in v.values()) for k, v in info.items()}
     return d1, d2, d3