def sp_func(s: S, qf=qf, state_action_func=state_action_func, softmax=softmax, epsilon=epsilon) -> Mapping[A, float]: av_dict = {a: qf((s, a)) for a in state_action_func(s)} return get_softmax_action_probs(av_dict) if softmax else\ get_epsilon_action_probs(av_dict, epsilon)
def edit_state_action_to_epsilon_greedy( self, state: S, action_value_dict: Mapping[A, float], epsilon: float ) -> None: self.policy_data[state] = get_epsilon_action_probs( action_value_dict, epsilon )
def get_soft_policy_from_qf_dict(qf_dict: SAf, softmax: bool, epsilon: float) -> Policy: if softmax: ret = Policy( {s: get_softmax_action_probs(v) for s, v in qf_dict.items()}) else: ret = Policy({ s: get_epsilon_action_probs(v, epsilon) for s, v in qf_dict.items() }) return ret
def get_epsilon_policy_from_qf(qf_dict: Mapping[S, Mapping[A, float]], epsilon: float) -> Policy: return Policy( {s: get_epsilon_action_probs(v, epsilon) for s, v in qf_dict.items()})
def sp_func(s: S, softmax=softmax, epsilon=epsilon) -> Mapping[A, float]: av_dict = get_act_value_dict_from_state(s) return get_softmax_action_probs(av_dict) if softmax else\ get_epsilon_action_probs(av_dict, epsilon)