Beispiel #1
0
    def __init__(self, mdp_rep_for_rl: MDPRepForRLTabular, softmax: bool,
                 epsilon: float, epsilon_half_life: float, num_episodes: int,
                 max_steps: int) -> None:

        self.mdp_rep: MDPRepForRLTabular = mdp_rep_for_rl
        self.softmax: bool = softmax
        self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
            epsilon, epsilon_half_life)
        self.num_episodes: int = num_episodes
        self.max_steps: int = max_steps
Beispiel #2
0
 def __init__(self, mdp_rep_for_adp: MDPRepForADP, num_samples: int,
              softmax: bool, epsilon: float, epsilon_half_life: float,
              tol: float, fa_spec: FuncApproxSpec) -> None:
     self.mdp_rep: MDPRepForADP = mdp_rep_for_adp
     self.num_samples: int = num_samples
     self.softmax: bool = softmax
     self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
         epsilon, epsilon_half_life)
     self.tol: float = tol
     self.fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
     self.state_action_func: Callable[[S], Set[A]] =\
         self.mdp_rep.state_action_func
    def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, softmax: bool,
                 epsilon: float, epsilon_half_life: float, num_episodes: int,
                 max_steps: int, fa_spec: FuncApproxSpec) -> None:

        self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl
        self.softmax: bool = softmax
        self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
            epsilon, epsilon_half_life)
        self.num_episodes: int = num_episodes
        self.max_steps: int = max_steps
        self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
        self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
        self.state_action_func = self.mdp_rep.state_action_func
Beispiel #4
0
 def __init__(
     self,
     mab: MabEnv,
     time_steps: int,
     num_episodes: int,
     epsilon: float,
     epsilon_half_life: float = 1e8,
     count_init: int = 0,
     mean_init: float = 0.,
 ) -> None:
     super().__init__(mab=mab,
                      time_steps=time_steps,
                      num_episodes=num_episodes)
     self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
         epsilon, epsilon_half_life)
     self.count_init: int = count_init
     self.mean_init: float = mean_init