def mdp_rep_to_mrp_rep1( mdp_rep: SASf, policy_rep: SAf ) -> SSf: return {s: sum_dicts([{s1: p * v2 for s1, v2 in v[a].items()} for a, p in policy_rep[s].items()]) for s, v in mdp_rep.items()}
def get_state_reward_gen_dict(tr: SASf, rr: SASf) \ -> Mapping[S, Mapping[A, Callable[[], Tuple[S, float]]]]: return { s: { a: get_state_reward_gen_func(tr[s][a], rr[s][a]) for a, _ in v.items() } for s, v in rr.items() }
def flatten_sasf_dict(sasf: SASf) -> FlattenedDict: return [((s, a, s1), f) for s, asf in sasf.items() for a, sf in asf.items() for s1, f in sf.items()]