Beispiel #1
0
 def mrp_func1(s: S,
               mdp_rep=mdp_rep,
               policy_func=policy_func) -> Mapping[S, float]:
     s_dict = policy_func(s)
     return sum_dicts([{s1: p * v
                        for s1, v in mdp_rep(s, a).items()}
                       for a, p in s_dict.items()])
Beispiel #2
0
def mdp_rep_to_mrp_rep1(
    mdp_rep: SASf,
    policy_rep: SAf
) -> SSf:
    return {s: sum_dicts([{s1: p * v2 for s1, v2 in v[a].items()}
                          for a, p in policy_rep[s].items()])
            for s, v in mdp_rep.items()}
Beispiel #3
0
def mdp_rep_to_mrp_rep1(mdp_rep: SASf, policy_rep: SAf) -> SSf:
    return {
        s:
        sum_dicts([{s1: policy_rep[s].get(a, 0) * v2
                    for s1, v2 in v1.items()} for a, v1 in v.items()])
        for s, v in mdp_rep.items()
    }