Beispiel #1
0
    def test_update(self):
        updated = self.dynamic_0.update([(0, 1.0), (1, 2.0), (2, 3.0)])
        self.assertEqual(self.dynamic_1, updated)

        partially_updated = self.dynamic_0.update([(1, 3.0)])
        expected = {0: 0.0, 1: 3.0, 2: 0.0}
        self.assertEqual(partially_updated, Dynamic(values_map=expected))
    def test_evaluate_mrp(self):
        start = Dynamic({s: 0.0 for s in self.finite_flip_flop.states()})

        v = iterate.converged(
            evaluate_mrp(
                self.finite_flip_flop,
                γ=0.99,
                approx_0=start,
                non_terminal_states_distribution=Choose(
                    set(self.finite_flip_flop.states())),
                num_state_samples=5,
            ),
            done=lambda a, b: a.within(b, 1e-4),
        )

        self.assertEqual(len(v.values_map), 2)

        for s in v.values_map:
            self.assertLess(abs(v(s) - 170), 1.0)

        v_finite = iterate.converged(
            evaluate_finite_mrp(self.finite_flip_flop, γ=0.99, approx_0=start),
            done=lambda a, b: a.within(b, 1e-4),
        )

        assert_allclose(v.evaluate([True, False]),
                        v_finite.evaluate([True, False]),
                        rtol=0.01)
Beispiel #3
0
    def test_value_iteration(self):
        mdp_map: Mapping[NonTerminal[InventoryState],
                         float] = value_iteration_result(
                             self.si_mdp, self.gamma)[0]
        # print(mdp_map)
        mdp_vf1: np.ndarray = np.array([mdp_map[s] for s in self.states])

        fa = Dynamic({s: 0.0 for s in self.states})
        mdp_finite_fa = iterate.converged(value_iteration_finite(
            self.si_mdp, self.gamma, fa),
                                          done=lambda a, b: a.within(b, 1e-5))
        # print(mdp_finite_fa.values_map)
        mdp_vf2: np.ndarray = mdp_finite_fa.evaluate(self.states)

        self.assertLess(max(abs(mdp_vf1 - mdp_vf2)), 0.01)

        mdp_fa = iterate.converged(value_iteration(self.si_mdp,
                                                   self.gamma,
                                                   fa,
                                                   Choose(self.states),
                                                   num_state_samples=30),
                                   done=lambda a, b: a.within(b, 1e-5))
        # print(mdp_fa.values_map)
        mdp_vf3: np.ndarray = mdp_fa.evaluate(self.states)
        self.assertLess(max(abs(mdp_vf1 - mdp_vf3)), 0.01)
Beispiel #4
0
    def test_value_iteration(self):
        vpstar = optimal_vf_and_policy(self.mdp_seq, 1.)
        states = self.single_step_mdp.states()
        fa_dynamic = Dynamic({s: 0.0 for s in states})
        fa_tabular = Tabular()
        distribution = Choose(set(states))
        approx_vpstar_finite = back_opt_vf_and_policy_finite(
            [(self.mdp_seq[i], fa_dynamic) for i in range(self.steps)],
            1.
        )
        approx_vpstar = back_opt_vf_and_policy(
            [(self.single_step_mdp, fa_tabular, distribution)
             for _ in range(self.steps)],
            1.,
            num_state_samples=120,
            error_tolerance=0.01
        )

        for t, ((v1, _), (v2, _), (v3, _)) in enumerate(zip(
                vpstar,
                approx_vpstar_finite,
                approx_vpstar
        )):
            states = self.mdp_seq[t].keys()
            v1_arr = np.array([v1[s] for s in states])
            v2_arr = v2.evaluate(states)
            v3_arr = v3.evaluate(states)
            self.assertLess(max(abs(v1_arr - v2_arr)), 0.001)
            self.assertLess(max(abs(v1_arr - v3_arr)), 1.0)
Beispiel #5
0
    def test_evaluate_mrp(self):
        vf = evaluate(self.mrp_seq, 1.)
        states = self.single_step_mrp.states()
        fa_dynamic = Dynamic({s: 0.0 for s in states})
        fa_tabular = Tabular()
        distribution = Choose(set(states))
        approx_vf_finite = backward_evaluate_finite(
            [(self.mrp_seq[i], fa_dynamic) for i in range(self.steps)],
            1.
        )
        approx_vf = backward_evaluate(
            [(self.single_step_mrp, fa_tabular, distribution)
             for _ in range(self.steps)],
            1.,
            num_state_samples=120,
            error_tolerance=0.01
        )

        for t, (v1, v2, v3) in enumerate(zip(
                vf,
                approx_vf_finite,
                approx_vf
        )):
            states = self.mrp_seq[t].keys()
            v1_arr = np.array([v1[s] for s in states])
            v2_arr = v2.evaluate(states)
            v3_arr = v3.evaluate(states)
            self.assertLess(max(abs(v1_arr - v2_arr)), 0.001)
            self.assertLess(max(abs(v1_arr - v3_arr)), 1.0)
Beispiel #6
0
    def test_evaluate_mrp(self):
        mrp_vf1: np.ndarray = self.implied_mrp.get_value_function_vec(
            self.gamma)
        # print({s: mrp_vf1[i] for i, s in enumerate(self.states)})

        fa = Dynamic({s: 0.0 for s in self.states})
        mrp_finite_fa = iterate.converged(
            evaluate_finite_mrp(self.implied_mrp, self.gamma, fa),
            done=lambda a, b: a.within(b, 1e-4),
        )
        # print(mrp_finite_fa.values_map)
        mrp_vf2: np.ndarray = mrp_finite_fa.evaluate(self.states)

        self.assertLess(max(abs(mrp_vf1 - mrp_vf2)), 0.001)

        mrp_fa = iterate.converged(
            evaluate_mrp(
                self.implied_mrp,
                self.gamma,
                fa,
                Choose(self.states),
                num_state_samples=30,
            ),
            done=lambda a, b: a.within(b, 0.1),
        )
        # print(mrp_fa.values_map)
        mrp_vf3: np.ndarray = mrp_fa.evaluate(self.states)
        self.assertLess(max(abs(mrp_vf1 - mrp_vf3)), 1.0)
    def test_evaluate_finite_mrp(self):
        start = Dynamic({s: 0.0 for s in self.finite_flip_flop.states()})
        v = FunctionApprox.converged(
            evaluate_finite_mrp(self.finite_flip_flop, γ=0.99, approx_0=start))

        self.assertEqual(len(v.values_map), 2)

        for s in v.values_map:
            self.assertLess(abs(v(s) - 170), 0.1)
    def test_evaluate_finite_mrp(self):
        start = Dynamic({s: 0.0 for s in self.finite_flip_flop.states()})
        v = iterate.converged(
            evaluate_finite_mrp(self.finite_flip_flop, γ=0.99, approx_0=start),
            done=lambda a, b: a.within(b, 1e-4),
        )

        self.assertEqual(len(v.values_map), 2)

        for s in v.values_map:
            self.assertLess(abs(v(s) - 170), 0.1)
    def test_compare_to_backward_induction(self):
        finite_horizon = finite_horizon_MRP(self.finite_flip_flop, 10)

        start = Dynamic({s: 0.0 for s in finite_horizon.states()})
        v = FunctionApprox.converged(
            evaluate_finite_mrp(finite_horizon, γ=1, approx_0=start))
        self.assertEqual(len(v.values_map), 22)

        finite_v =\
            list(evaluate(unwrap_finite_horizon_MRP(finite_horizon), gamma=1))

        for time in range(0, 10):
            self.assertAlmostEqual(v(WithTime(state=True, time=time)),
                                   finite_v[time][True])
            self.assertAlmostEqual(v(WithTime(state=False, time=time)),
                                   finite_v[time][False])
Beispiel #10
0
class TestDynamic(unittest.TestCase):
    def setUp(self):
        self.dynamic_0 = Dynamic(values_map={0: 0.0, 1: 0.0, 2: 0.0})
        self.dynamic_almost_0 = Dynamic(values_map={0: 0.01, 1: 0.01, 2: 0.01})

        self.dynamic_1 = Dynamic(values_map={0: 1.0, 1: 2.0, 2: 3.0})
        self.dynamic_almost_1 = Dynamic(values_map={0: 1.01, 1: 2.01, 2: 3.01})

    def test_update(self):
        updated = self.dynamic_0.update([(0, 1.0), (1, 2.0), (2, 3.0)])
        self.assertEqual(self.dynamic_1, updated)

        partially_updated = self.dynamic_0.update([(1, 3.0)])
        expected = {0: 0.0, 1: 3.0, 2: 0.0}
        self.assertEqual(partially_updated, Dynamic(values_map=expected))

    def test_evaluate(self):
        np.testing.assert_array_almost_equal(
            self.dynamic_0.evaluate([0, 1, 2]), np.array([0.0, 0.0, 0.0]))

        np.testing.assert_array_almost_equal(
            self.dynamic_1.evaluate([0, 1, 2]), np.array([1.0, 2.0, 3.0]))

    def test_call(self):
        for i in range(0, 3):
            self.assertEqual(self.dynamic_0(i), 0.0)
            self.assertEqual(self.dynamic_1(i), float(i + 1))

    def test_within(self):
        self.assertTrue(self.dynamic_0.within(self.dynamic_0, tolerance=0.0))
        self.assertTrue(
            self.dynamic_0.within(self.dynamic_almost_0, tolerance=0.011))

        self.assertTrue(self.dynamic_1.within(self.dynamic_1, tolerance=0.0))
        self.assertTrue(
            self.dynamic_1.within(self.dynamic_almost_1, tolerance=0.011))
    def test_value_iteration(self):
        mdp_map: Mapping[InventoryState, float] = value_iteration_result(
            self.si_mdp, self.gamma)[0]
        # print(mdp_map)
        mdp_vf1: np.ndarray = np.array([mdp_map[s] for s in self.states])

        fa = Dynamic({s: 0.0 for s in self.states})
        mdp_finite_fa = FunctionApprox.converged(
            value_iteration_finite(self.si_mdp, self.gamma, fa))
        # print(mdp_finite_fa.values_map)
        mdp_vf2: np.ndarray = mdp_finite_fa.evaluate(self.states)

        self.assertLess(max(abs(mdp_vf1 - mdp_vf2)), 0.001)

        mdp_fa = FunctionApprox.converged(
            value_iteration(self.si_mdp,
                            self.gamma,
                            fa,
                            Choose(self.states),
                            num_state_samples=30), 0.1)
        # print(mdp_fa.values_map)
        mdp_vf3: np.ndarray = mdp_fa.evaluate(self.states)
        self.assertLess(max(abs(mdp_vf1 - mdp_vf3)), 1.0)
Beispiel #12
0
    def setUp(self):
        self.dynamic_0 = Dynamic(values_map={0: 0.0, 1: 0.0, 2: 0.0})
        self.dynamic_almost_0 = Dynamic(values_map={0: 0.01, 1: 0.01, 2: 0.01})

        self.dynamic_1 = Dynamic(values_map={0: 1.0, 1: 2.0, 2: 3.0})
        self.dynamic_almost_1 = Dynamic(values_map={0: 1.01, 1: 2.01, 2: 3.01})