def from_values(cls, values: dict):
     mapping = dict()
     for s in State.get_all_states():
         if s.current_sum < 12: mapping[s] = Action.HIT
         elif values[StateActionPair(s, Action.STICK)] > values[StateActionPair(s, Action.HIT)]:
             mapping[s] = Action.STICK
         else: mapping[s] = Action.HIT
     return Policy.from_deterministic_mapping(mapping)
 def epsilon_greedy_from_values(cls, values: dict, exploring_prob: Callable):
     mapping = dict()
     for s in State.get_all_states():
         if values[StateActionPair(s, Action.STICK)] > values[StateActionPair(s, Action.HIT)]:
             mapping[s] = [1. - exploring_prob(), exploring_prob()]
         else:
             mapping[s] = [exploring_prob(), 1. - exploring_prob()]
     return Policy.from_probabilistic_mapping(mapping)
from itertools import product

from model.actions import Action
from model.policy import Policy
from model.state import State, StateActionPair

ALL_STATES = State.get_all_states()
ALL_STATE_ACTION_PAIRS = [
    StateActionPair(s, a) for s, a in product(ALL_STATES, list(Action))
]


class Algorithm:
    @classmethod
    def _create_sap_unif_mapping(cls, value):
        return {sap: value for sap in ALL_STATE_ACTION_PAIRS}

    @property
    def policy(self) -> Policy:
        raise NotImplemented

    def __init__(self):
        self._Q = Algorithm._create_sap_unif_mapping(0.)

    def train(self, rounds: int) -> None:
        raise NotImplemented


class MonteCarloAlgorithm(Algorithm):
    def __init__(self):
        super().__init__()