Python FuncApproxSpec.get_vf_func_approx_obj 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: algorithms.func_approx_spec

클래스/타입: FuncApproxSpec

메소드/함수: get_vf_func_approx_obj

hotexamples.com에서의 예제들: 6

Python FuncApproxSpec.get_vf_func_approx_obj - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 algorithms.func_approx_spec.FuncApproxSpec.get_vf_func_approx_obj에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FuncApproxSpec(21)

get_vf_func_approx_obj(6)

get_qvf_func_approx_obj(3)

예제 #1

파일 보기

파일: adp_pg.py 프로젝트: zhongyunuestc/MDP-DP-RL

 def __init__(self, mdp_rep_for_adp_pg: MDPRepForADPPG, reinforce: bool,
              num_state_samples: int, num_next_state_samples: int,
              num_action_samples: int, num_batches: int, max_steps: int,
              actor_lambda: float, critic_lambda: float,
              score_func: Callable[[A, Sequence[float]], Sequence[float]],
              sample_actions_gen_func: Callable[[Sequence[float], int],
                                                Sequence[A]],
              vf_fa_spec: FuncApproxSpec,
              pol_fa_spec: Sequence[FuncApproxSpec]) -> None:
     self.mdp_rep: MDPRepForADPPG = mdp_rep_for_adp_pg
     self.reinforce: bool = reinforce
     self.num_state_samples: int = num_state_samples
     self.num_next_state_samples: int = num_next_state_samples
     self.num_action_samples: int = num_action_samples
     self.num_batches: int = num_batches
     self.max_steps: int = max_steps
     self.actor_lambda: float = actor_lambda
     self.critic_lambda: float = critic_lambda
     self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\
         score_func
     self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\
         sample_actions_gen_func
     self.vf_fa: FuncApproxBase = vf_fa_spec.get_vf_func_approx_obj()
     self.pol_fa: Sequence[FuncApproxBase] =\
         [s.get_vf_func_approx_obj() for s in pol_fa_spec]

예제 #2

파일 보기

 def __init__(self, mdp_rep_for_adp: MDPRepForADP, num_samples: int,
              softmax: bool, epsilon: float, epsilon_half_life: float,
              tol: float, fa_spec: FuncApproxSpec) -> None:
     self.mdp_rep: MDPRepForADP = mdp_rep_for_adp
     self.num_samples: int = num_samples
     self.softmax: bool = softmax
     self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
         epsilon, epsilon_half_life)
     self.tol: float = tol
     self.fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
     self.state_action_func: Callable[[S], Set[A]] =\
         self.mdp_rep.state_action_func

예제 #3

파일 보기

    def __init__(self, mdp_rep_for_rl: MDPRepForRLFA, softmax: bool,
                 epsilon: float, num_episodes: int, max_steps: int,
                 fa_spec: FuncApproxSpec) -> None:

        self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl
        self.softmax: bool = softmax
        self.epsilon: float = epsilon
        self.num_episodes: int = num_episodes
        self.max_steps: int = max_steps
        self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
        self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
        self.state_action_func = self.mdp_rep.state_action_func

예제 #4

파일 보기

 def __init__(self, mdp_rep_for_adp_pg: MDPRepForADPPG,
              num_state_samples: int, num_action_samples: int, tol: float,
              score_func: Callable[[A, Sequence[float]], Sequence[float]],
              sample_actions_gen_func: Callable[[Sequence[float], int],
                                                Sequence[A]],
              vf_fa_spec: FuncApproxSpec,
              pol_fa_spec: Sequence[FuncApproxSpec]) -> None:
     self.mdp_rep: MDPRepForADPPG = mdp_rep_for_adp_pg
     self.num_state_samples: int = num_state_samples
     self.num_action_samples: int = num_action_samples
     self.tol: float = tol
     self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\
         score_func
     self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\
         sample_actions_gen_func
     self.vf_fa: FuncApproxBase = vf_fa_spec.get_vf_func_approx_obj()
     self.pol_fa: Sequence[FuncApproxBase] =\
         [s.get_vf_func_approx_obj() for s in pol_fa_spec]

예제 #5

파일 보기

파일: pg.py 프로젝트: Shilpibhargava/MDP-DP-RL

 def __init__(self, mdp_rep_for_rl_pg: MDPRepForRLPG, batch_size: int,
              num_batches: int, num_action_samples: int, max_steps: int,
              actor_lambda: float, critic_lambda: float,
              score_func: Callable[[A, Sequence[float]], Sequence[float]],
              sample_actions_gen_func: Callable[[Sequence[float], int],
                                                Sequence[A]],
              fa_spec: FuncApproxSpec,
              pol_fa_spec: Sequence[FuncApproxSpec]) -> None:
     self.mdp_rep: MDPRepForRLPG = mdp_rep_for_rl_pg
     self.batch_size: int = batch_size
     self.num_batches: int = num_batches
     self.num_action_samples: int = num_action_samples
     self.max_steps: int = max_steps
     self.actor_lambda: float = actor_lambda
     self.critic_lambda: float = critic_lambda
     self.score_func: Callable[[A, Sequence[float]], Sequence[float]] =\
         score_func
     self.sample_actions_gen_func: Callable[[Sequence[float], int], Sequence[A]] =\
         sample_actions_gen_func
     self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
     self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
     self.pol_fa: Sequence[FuncApproxBase] =\
         [s.get_vf_func_approx_obj() for s in pol_fa_spec]

예제 #6

파일 보기

파일: rl_func_approx_base.py 프로젝트: zhongyunuestc/MDP-DP-RL

    def __init__(
        self,
        mdp_rep_for_rl: MDPRepForRLFA,
        exploring_start: bool,
        softmax: bool,
        epsilon: float,
        epsilon_half_life: float,
        num_episodes: int,
        max_steps: int,
        fa_spec: FuncApproxSpec
    ) -> None:

        self.mdp_rep: MDPRepForRLFA = mdp_rep_for_rl
        self.exploring_start: bool = exploring_start
        self.softmax: bool = softmax
        self.epsilon_func: Callable[[int], float] = get_epsilon_decay_func(
            epsilon,
            epsilon_half_life
        )
        self.num_episodes: int = num_episodes
        self.max_steps: int = max_steps
        self.vf_fa: FuncApproxBase = fa_spec.get_vf_func_approx_obj()
        self.qvf_fa: FuncApproxBase = fa_spec.get_qvf_func_approx_obj()
        self.state_action_func = self.mdp_rep.state_action_func