def test_evaluate_mrp(self):
        start = Dynamic({s: 0.0 for s in self.finite_flip_flop.states()})

        v = iterate.converged(
            evaluate_mrp(
                self.finite_flip_flop,
                γ=0.99,
                approx_0=start,
                non_terminal_states_distribution=Choose(
                    set(self.finite_flip_flop.states())),
                num_state_samples=5,
            ),
            done=lambda a, b: a.within(b, 1e-4),
        )

        self.assertEqual(len(v.values_map), 2)

        for s in v.values_map:
            self.assertLess(abs(v(s) - 170), 1.0)

        v_finite = iterate.converged(
            evaluate_finite_mrp(self.finite_flip_flop, γ=0.99, approx_0=start),
            done=lambda a, b: a.within(b, 1e-4),
        )

        assert_allclose(v.evaluate([True, False]),
                        v_finite.evaluate([True, False]),
                        rtol=0.01)
Esempio n. 2
0
    def test_evaluate_mrp(self):
        mrp_vf1: np.ndarray = self.implied_mrp.get_value_function_vec(
            self.gamma)
        # print({s: mrp_vf1[i] for i, s in enumerate(self.states)})

        fa = Dynamic({s: 0.0 for s in self.states})
        mrp_finite_fa = iterate.converged(
            evaluate_finite_mrp(self.implied_mrp, self.gamma, fa),
            done=lambda a, b: a.within(b, 1e-4),
        )
        # print(mrp_finite_fa.values_map)
        mrp_vf2: np.ndarray = mrp_finite_fa.evaluate(self.states)

        self.assertLess(max(abs(mrp_vf1 - mrp_vf2)), 0.001)

        mrp_fa = iterate.converged(
            evaluate_mrp(
                self.implied_mrp,
                self.gamma,
                fa,
                Choose(self.states),
                num_state_samples=30,
            ),
            done=lambda a, b: a.within(b, 0.1),
        )
        # print(mrp_fa.values_map)
        mrp_vf3: np.ndarray = mrp_fa.evaluate(self.states)
        self.assertLess(max(abs(mrp_vf1 - mrp_vf3)), 1.0)
Esempio n. 3
0
def evaluate_mrp_result(
    mrp: FiniteMarkovRewardProcess[S],
    gamma: float,
    approx_0: FunctionApprox[S],
) -> FunctionApprox[S]:
    v_star: np.ndarray = converged(evaluate_finite_mrp(mrp, gamma, approx_0),
                                   done=almost_equal_vf_approx)
    return v_star
    def test_evaluate_finite_mrp(self):
        start = Dynamic({s: 0.0 for s in self.finite_flip_flop.states()})
        v = FunctionApprox.converged(
            evaluate_finite_mrp(self.finite_flip_flop, γ=0.99, approx_0=start))

        self.assertEqual(len(v.values_map), 2)

        for s in v.values_map:
            self.assertLess(abs(v(s) - 170), 0.1)
    def test_evaluate_finite_mrp(self):
        start = Dynamic({s: 0.0 for s in self.finite_flip_flop.states()})
        v = iterate.converged(
            evaluate_finite_mrp(self.finite_flip_flop, γ=0.99, approx_0=start),
            done=lambda a, b: a.within(b, 1e-4),
        )

        self.assertEqual(len(v.values_map), 2)

        for s in v.values_map:
            self.assertLess(abs(v(s) - 170), 0.1)
    def test_compare_to_backward_induction(self):
        finite_horizon = finite_horizon_MRP(self.finite_flip_flop, 10)

        start = Dynamic({s: 0.0 for s in finite_horizon.states()})
        v = FunctionApprox.converged(
            evaluate_finite_mrp(finite_horizon, γ=1, approx_0=start))
        self.assertEqual(len(v.values_map), 22)

        finite_v =\
            list(evaluate(unwrap_finite_horizon_MRP(finite_horizon), gamma=1))

        for time in range(0, 10):
            self.assertAlmostEqual(v(WithTime(state=True, time=time)),
                                   finite_v[time][True])
            self.assertAlmostEqual(v(WithTime(state=False, time=time)),
                                   finite_v[time][False])