Esempio n. 1
0
    def __init__(
        self,
        mdp_rep_for_rl: MDPRepForRLTabular,
        first_visit: bool,
        softmax: bool,
        epsilon: float,
        epsilon_half_life: float,
        num_episodes: int,
        max_steps: int
    ) -> None:

        super().__init__(
            mdp_rep_for_rl=mdp_rep_for_rl,
            softmax=softmax,
            epsilon=epsilon,
            epsilon_half_life=epsilon_half_life,
            num_episodes=num_episodes,
            max_steps=max_steps
        )
        self.first_visit: bool = first_visit
        self.nt_return_eval_steps = get_nt_return_eval_steps(
            max_steps,
            mdp_rep_for_rl.gamma,
            1e-4
        )
Esempio n. 2
0
    def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, first_visit: bool,
                 softmax: bool, epsilon: float, num_episodes: int,
                 max_steps: int, fa_spec: FuncApproxSpec) -> None:

        super().__init__(mdp_rep_for_rl=mdp_rep_for_rl,
                         softmax=softmax,
                         epsilon=epsilon,
                         num_episodes=num_episodes,
                         max_steps=max_steps,
                         fa_spec=fa_spec)
        self.first_visit: bool = first_visit
        self.nt_return_eval_steps = get_nt_return_eval_steps(
            max_steps, mdp_rep_for_rl.gamma, 1e-4)
Esempio n. 3
0
    def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, exploring_start: bool,
                 softmax: bool, epsilon: float, epsilon_half_life: float,
                 num_episodes: int, max_steps: int,
                 fa_spec: FuncApproxSpec) -> None:

        super().__init__(mdp_rep_for_rl=mdp_rep_for_rl,
                         exploring_start=exploring_start,
                         softmax=softmax,
                         epsilon=epsilon,
                         epsilon_half_life=epsilon_half_life,
                         num_episodes=num_episodes,
                         max_steps=max_steps,
                         fa_spec=fa_spec)
        self.nt_return_eval_steps = get_nt_return_eval_steps(
            max_steps, mdp_rep_for_rl.gamma, 1e-4)