def __init__(self, mdp_rep_for_adp_pg: MDPRepForADPPG, reinforce: bool, num_state_samples: int, num_next_state_samples: int, num_action_samples: int, num_batches: int, max_steps: int, actor_lambda: float, critic_lambda: float, score_func: Callable[[A, Sequence[float]], Sequence[float]], sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]], vf_fa_spec: FuncApproxSpec, pol_fa_spec: Sequence[FuncApproxSpec]) -> None: self.mdp_rep: MDPRepForADPPG = mdp_rep_for_adp_pg self.reinforce: bool = reinforce self.num_state_samples: int = num_state_samples self.num_next_state_samples: int = num_next_state_samples self.num_action_samples: int = num_action_samples self.num_batches: int = num_batches self.max_steps: int = max_steps self.actor_lambda: float = actor_lambda self.critic_lambda: float = critic_lambda self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\ score_func self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\ sample_actions_gen_func self.vf_fa: FuncApproxBase = vf_fa_spec.get_vf_func_approx_obj() self.pol_fa: Sequence[FuncApproxBase] =\ [s.get_vf_func_approx_obj() for s in pol_fa_spec]
def __init__(self, mdp_rep_for_adp: MDPRepForADP, num_samples: int, softmax: bool, epsilon: float, epsilon_half_life: float, tol: float, fa_spec: FuncApproxSpec) -> None: self.mdp_rep: MDPRepForADP = mdp_rep_for_adp self.num_samples: int = num_samples self.softmax: bool = softmax self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func( epsilon, epsilon_half_life) self.tol: float = tol self.fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj() self.state_action_func: Callable[[S], Set[A]] =\ self.mdp_rep.state_action_func
def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, softmax: bool, epsilon: float, num_episodes: int, max_steps: int, fa_spec: FuncApproxSpec) -> None: self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl self.softmax: bool = softmax self.epsilon: float = epsilon self.num_episodes: int = num_episodes self.max_steps: int = max_steps self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj() self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj() self.state_action_func = self.mdp_rep.state_action_func
def __init__(self, mdp_rep_for_adp_pg: MDPRepForADPPG, num_state_samples: int, num_action_samples: int, tol: float, score_func: Callable[[A, Sequence[float]], Sequence[float]], sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]], vf_fa_spec: FuncApproxSpec, pol_fa_spec: Sequence[FuncApproxSpec]) -> None: self.mdp_rep: MDPRepForADPPG = mdp_rep_for_adp_pg self.num_state_samples: int = num_state_samples self.num_action_samples: int = num_action_samples self.tol: float = tol self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\ score_func self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\ sample_actions_gen_func self.vf_fa: FuncApproxBase = vf_fa_spec.get_vf_func_approx_obj() self.pol_fa: Sequence[FuncApproxBase] =\ [s.get_vf_func_approx_obj() for s in pol_fa_spec]
def __init__(self, mdp_rep_for_rl_pg: MDPRepForRLPG, batch_size: int, num_batches: int, num_action_samples: int, max_steps: int, actor_lambda: float, critic_lambda: float, score_func: Callable[[A, Sequence[float]], Sequence[float]], sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]], fa_spec: FuncApproxSpec, pol_fa_spec: Sequence[FuncApproxSpec]) -> None: self.mdp_rep: MDPRepForRLPG = mdp_rep_for_rl_pg self.batch_size: int = batch_size self.num_batches: int = num_batches self.num_action_samples: int = num_action_samples self.max_steps: int = max_steps self.actor_lambda: float = actor_lambda self.critic_lambda: float = critic_lambda self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\ score_func self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\ sample_actions_gen_func self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj() self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj() self.pol_fa: Sequence[FuncApproxBase] =\ [s.get_vf_func_approx_obj() for s in pol_fa_spec]
def __init__( self, mdp_rep_for_rl: MDPRepForRLFA, exploring_start: bool, softmax: bool, epsilon: float, epsilon_half_life: float, num_episodes: int, max_steps: int, fa_spec: FuncApproxSpec ) -> None: self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl self.exploring_start: bool = exploring_start self.softmax: bool = softmax self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func( epsilon, epsilon_half_life ) self.num_episodes: int = num_episodes self.max_steps: int = max_steps self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj() self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj() self.state_action_func = self.mdp_rep.state_action_func