def __init__( self, mdp_rep_for_rl: MDPRepForRLTabular, first_visit: bool, softmax: bool, epsilon: float, epsilon_half_life: float, num_episodes: int, max_steps: int ) -> None: super().__init__( mdp_rep_for_rl=mdp_rep_for_rl, softmax=softmax, epsilon=epsilon, epsilon_half_life=epsilon_half_life, num_episodes=num_episodes, max_steps=max_steps ) self.first_visit: bool = first_visit self.nt_return_eval_steps = get_nt_return_eval_steps( max_steps, mdp_rep_for_rl.gamma, 1e-4 )
def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, first_visit: bool, softmax: bool, epsilon: float, num_episodes: int, max_steps: int, fa_spec: FuncApproxSpec) -> None: super().__init__(mdp_rep_for_rl=mdp_rep_for_rl, softmax=softmax, epsilon=epsilon, num_episodes=num_episodes, max_steps=max_steps, fa_spec=fa_spec) self.first_visit: bool = first_visit self.nt_return_eval_steps = get_nt_return_eval_steps( max_steps, mdp_rep_for_rl.gamma, 1e-4)
def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, exploring_start: bool, softmax: bool, epsilon: float, epsilon_half_life: float, num_episodes: int, max_steps: int, fa_spec: FuncApproxSpec) -> None: super().__init__(mdp_rep_for_rl=mdp_rep_for_rl, exploring_start=exploring_start, softmax=softmax, epsilon=epsilon, epsilon_half_life=epsilon_half_life, num_episodes=num_episodes, max_steps=max_steps, fa_spec=fa_spec) self.nt_return_eval_steps = get_nt_return_eval_steps( max_steps, mdp_rep_for_rl.gamma, 1e-4)