예제 #1
0
 def __init__(self, mdp_rep_for_adp_pg: MDPRepForADPPG, reinforce: bool,
              num_state_samples: int, num_next_state_samples: int,
              num_action_samples: int, num_batches: int, max_steps: int,
              actor_lambda: float, critic_lambda: float,
              score_func: Callable[[A, Sequence[float]], Sequence[float]],
              sample_actions_gen_func: Callable[[Sequence[float], int],
                                                Sequence[A]],
              vf_fa_spec: FuncApproxSpec,
              pol_fa_spec: Sequence[FuncApproxSpec]) -> None:
     self.mdp_rep: MDPRepForADPPG = mdp_rep_for_adp_pg
     self.reinforce: bool = reinforce
     self.num_state_samples: int = num_state_samples
     self.num_next_state_samples: int = num_next_state_samples
     self.num_action_samples: int = num_action_samples
     self.num_batches: int = num_batches
     self.max_steps: int = max_steps
     self.actor_lambda: float = actor_lambda
     self.critic_lambda: float = critic_lambda
     self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\
         score_func
     self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\
         sample_actions_gen_func
     self.vf_fa: FuncApproxBase = vf_fa_spec.get_vf_func_approx_obj()
     self.pol_fa: Sequence[FuncApproxBase] =\
         [s.get_vf_func_approx_obj() for s in pol_fa_spec]
예제 #2
0
 def __init__(self, mdp_rep_for_adp: MDPRepForADP, num_samples: int,
              softmax: bool, epsilon: float, epsilon_half_life: float,
              tol: float, fa_spec: FuncApproxSpec) -> None:
     self.mdp_rep: MDPRepForADP = mdp_rep_for_adp
     self.num_samples: int = num_samples
     self.softmax: bool = softmax
     self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
         epsilon, epsilon_half_life)
     self.tol: float = tol
     self.fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
     self.state_action_func: Callable[[S], Set[A]] =\
         self.mdp_rep.state_action_func
예제 #3
0
    def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, softmax: bool,
                 epsilon: float, num_episodes: int, max_steps: int,
                 fa_spec: FuncApproxSpec) -> None:

        self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl
        self.softmax: bool = softmax
        self.epsilon: float = epsilon
        self.num_episodes: int = num_episodes
        self.max_steps: int = max_steps
        self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
        self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
        self.state_action_func = self.mdp_rep.state_action_func
예제 #4
0
 def __init__(self, mdp_rep_for_adp_pg: MDPRepForADPPG,
              num_state_samples: int, num_action_samples: int, tol: float,
              score_func: Callable[[A, Sequence[float]], Sequence[float]],
              sample_actions_gen_func: Callable[[Sequence[float], int],
                                                Sequence[A]],
              vf_fa_spec: FuncApproxSpec,
              pol_fa_spec: Sequence[FuncApproxSpec]) -> None:
     self.mdp_rep: MDPRepForADPPG = mdp_rep_for_adp_pg
     self.num_state_samples: int = num_state_samples
     self.num_action_samples: int = num_action_samples
     self.tol: float = tol
     self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\
         score_func
     self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\
         sample_actions_gen_func
     self.vf_fa: FuncApproxBase = vf_fa_spec.get_vf_func_approx_obj()
     self.pol_fa: Sequence[FuncApproxBase] =\
         [s.get_vf_func_approx_obj() for s in pol_fa_spec]
예제 #5
0
 def __init__(self, mdp_rep_for_rl_pg: MDPRepForRLPG, batch_size: int,
              num_batches: int, num_action_samples: int, max_steps: int,
              actor_lambda: float, critic_lambda: float,
              score_func: Callable[[A, Sequence[float]], Sequence[float]],
              sample_actions_gen_func: Callable[[Sequence[float], int],
                                                Sequence[A]],
              fa_spec: FuncApproxSpec,
              pol_fa_spec: Sequence[FuncApproxSpec]) -> None:
     self.mdp_rep: MDPRepForRLPG = mdp_rep_for_rl_pg
     self.batch_size: int = batch_size
     self.num_batches: int = num_batches
     self.num_action_samples: int = num_action_samples
     self.max_steps: int = max_steps
     self.actor_lambda: float = actor_lambda
     self.critic_lambda: float = critic_lambda
     self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\
         score_func
     self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\
         sample_actions_gen_func
     self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
     self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
     self.pol_fa: Sequence[FuncApproxBase] =\
         [s.get_vf_func_approx_obj() for s in pol_fa_spec]
    def __init__(
        self,
        mdp_rep_for_rl: MDPRepForRLFA,
        exploring_start: bool,
        softmax: bool,
        epsilon: float,
        epsilon_half_life: float,
        num_episodes: int,
        max_steps: int,
        fa_spec: FuncApproxSpec
    ) -> None:

        self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl
        self.exploring_start: bool = exploring_start
        self.softmax: bool = softmax
        self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
            epsilon,
            epsilon_half_life
        )
        self.num_episodes: int = num_episodes
        self.max_steps: int = max_steps
        self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
        self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
        self.state_action_func = self.mdp_rep.state_action_func