def get_act_value_func(self, pol_func: PolicyType) -> QFType:
     return self.get_qv_func_fa(
         lambda s, pol_func=pol_func: get_pdf_from_samples(
             pol_func(s)(len(self.state_action_func(s)) *
                         RLFuncApproxBase.NUM_SAMPLES_PER_ACTION)
         )
     )
    def get_value_func(self, polf: PolicyType) -> Callable[[S], float]:
        pol = Policy({
            s: get_pdf_from_samples(
                polf(s)(len(v) * TabularBase.NUM_SAMPLES_PER_ACTION))
            for s, v in self.get_state_action_dict().items()
        })

        # noinspection PyShadowingNames
        def vf(state: S, pol=pol) -> float:
            return self.get_value_func_dict(pol)[state]

        return vf
Beispiel #3
0
 def get_value_func(self, pol_func: PolicyType) -> VFType:
     return self.get_value_func_fa(lambda s: get_pdf_from_samples(
         pol_func(s)
         (len(self.state_action_func(s)) * ADP.NUM_SAMPLES_PER_ACTION)))