Example #1
0
            raise ValueError
        return ret

    def pf_as_policy_type(i: int) -> Callable[[int], Sequence[str]]:
        return get_sampling_func_from_prob_dict(policy_func(i))

    this_qf = adp_pg_obj.get_act_value_func(pf_as_policy_type)
    this_vf = adp_pg_obj.get_value_func(pf_as_policy_type)
    print("Printing vf for a policy")
    print(this_vf(1))
    print(this_vf(2))
    print(this_vf(3))
    print("Printing DP vf for a policy")
    from processes.policy import Policy
    true_vf_for_pol = mdp_ref_obj1.get_value_func_dict(
        Policy({s: policy_func(s)
                for s in {1, 2, 3}}))
    print(true_vf_for_pol)

    opt_det_polf = adp_pg_obj.get_optimal_det_policy_func()

    # noinspection PyShadowingNames
    def opt_polf(s: S, opt_det_polf=opt_det_polf) -> Mapping[A, float]:
        return {opt_det_polf(s): 1.0}

    print("Printing Opt Policy")
    print(opt_polf(1))
    print(opt_polf(2))
    print(opt_polf(3))

    opt_vf = adp_pg_obj.get_value_func(adp_pg_obj.get_policy_as_policy_type())
Example #2
0
    #     {s: policy_func(s) for s in {1, 2, 3}}
    # ))
    # print(true_vf_for_pol)
    #
    # # this_qf = adp_pg_obj.get_act_value_func_fa(policy_func)
    # this_vf = adp_pg_obj.get_value_func_fa(policy_func)
    # print("Printing vf for a policy")
    # print(this_vf(1))
    # print(this_vf(2))
    # print(this_vf(3))

    tol_val = 1e-6
    true_opt = mdp_ref_obj1.get_optimal_policy(tol=tol_val)
    print("Printing DP Opt Policy")
    print(true_opt)
    true_vf = mdp_ref_obj1.get_value_func_dict(true_opt)
    print("Printing DP Opt VF")
    print(true_vf)

    opt_det_polf = adp_pg_obj.get_optimal_det_policy_func()

    # noinspection PyShadowingNames
    def opt_polf(s: S, opt_det_polf=opt_det_polf) -> Mapping[A, float]:
        return {opt_det_polf(s): 1.0}

    print("Printing Opt Policy")
    print(opt_polf(1))
    print(opt_polf(2))
    print(opt_polf(3))

    opt_vf = adp_pg_obj.get_value_func(adp_pg_obj.get_policy_as_policy_type())