Ejemplo n.º 1
0
    def test_finite_horizon_MRP(self):
        finite = finite_horizon_MRP(self.finite_flip_flop, 10)

        trues = [NonTerminal(WithTime(True, time)) for time in range(10)]
        falses = [NonTerminal(WithTime(False, time)) for time in range(10)]
        non_terminal_states = set(trues + falses)
        self.assertEqual(set(finite.non_terminal_states), non_terminal_states)

        expected_transition = {}
        for state in non_terminal_states:
            t: int = state.state.time
            st: bool = state.state.state
            if t < 9:
                prob = {
                    (NonTerminal(WithTime(st, t + 1)), 1.0): 0.3,
                    (NonTerminal(WithTime(not st, t + 1)), 2.0): 0.7
                }
            else:
                prob = {
                    (Terminal(WithTime(st, t + 1)), 1.0): 0.3,
                    (Terminal(WithTime(not st, t + 1)), 2.0): 0.7
                }

            expected_transition[state] = Categorical(prob)

        for state in non_terminal_states:
            distribution.assert_almost_equal(
                self,
                finite.transition_reward(state),
                expected_transition[state])
Ejemplo n.º 2
0
    def test_unwrap_finite_horizon_MDP(self):
        finite = finite_horizon_MDP(self.finite_flip_flop, 10)
        unwrapped = unwrap_finite_horizon_MDP(finite)

        self.assertEqual(len(unwrapped), 10)

        def action_mapping_for(s: WithTime[bool]) -> \
                ActionMapping[bool, WithTime[bool]]:
            same = NonTerminal(s.step_time())
            different = NonTerminal(dataclasses.replace(
                s.step_time(),
                state=not s.state
            ))

            return {
                True: Categorical({
                    (same, 1.0): 0.7,
                    (different, 2.0): 0.3
                }),
                False: Categorical({
                    (same, 1.0): 0.3,
                    (different, 2.0): 0.7
                })
            }

        for t in range(9):
            for s in True, False:
                s_time = WithTime(state=s, time=t)
                for a in True, False:
                    distribution.assert_almost_equal(
                        self,
                        finite.mapping[NonTerminal(s_time)][a],
                        action_mapping_for(s_time)[a]
                    )

        for s in True, False:
            s_time = WithTime(state=s, time=9)
            same = Terminal(s_time.step_time())
            different = Terminal(dataclasses.replace(
                s_time.step_time(),
                state=not s_time.state
            ))
            act_map = {
                True: Categorical({
                    (same, 1.0): 0.7,
                    (different, 2.0): 0.3
                }),
                False: Categorical({
                    (same, 1.0): 0.3,
                    (different, 2.0): 0.7
                })

            }
            for a in True, False:
                distribution.assert_almost_equal(
                    self,
                    finite.mapping[NonTerminal(s_time)][a],
                    act_map[a]
                )
Ejemplo n.º 3
0
    def test_unwrap_finite_horizon_MRP(self):
        finite = finite_horizon_MRP(self.finite_flip_flop, 10)

        def transition_for(_):
            return {
                True: Categorical({
                    (NonTerminal(True), 1.0): 0.3,
                    (NonTerminal(False), 2.0): 0.7
                }),
                False: Categorical({
                    (NonTerminal(True), 2.0): 0.7,
                    (NonTerminal(False), 1.0): 0.3
                })
            }

        unwrapped = unwrap_finite_horizon_MRP(finite)
        self.assertEqual(len(unwrapped), 10)

        expected_transitions = [transition_for(n) for n in range(10)]
        for time in range(9):
            got = unwrapped[time]
            expected = expected_transitions[time]
            distribution.assert_almost_equal(
                self, got[NonTerminal(True)],
                expected[True]
            )
            distribution.assert_almost_equal(
                self, got[NonTerminal(False)],
                expected[False]
            )

        distribution.assert_almost_equal(
            self, unwrapped[9][NonTerminal(True)],
            Categorical({
                (Terminal(True), 1.0): 0.3,
                (Terminal(False), 2.0): 0.7
            })
        )
        distribution.assert_almost_equal(
            self, unwrapped[9][NonTerminal(False)],
            Categorical({
                (Terminal(True), 2.0): 0.7,
                (Terminal(False), 1.0): 0.3
            })
        )
Ejemplo n.º 4
0
 def sr_sampler_func(wealth=wealth,
                     alloc=alloc) -> Tuple[State[float], float]:
     next_wealth: float = alloc * (1 + distr.sample()) \
         + (wealth.state - alloc) * (1 + rate)
     reward: float = utility_f(next_wealth) \
         if t == steps - 1 else 0.
     next_state: State[float] = Terminal(next_wealth) \
         if t == steps - 1 else NonTerminal(next_wealth)
     return (next_state, reward)
Ejemplo n.º 5
0
 def sr_sampler_func(price=price,
                     exer=exer) -> Tuple[State[float], float]:
     if exer:
         return Terminal(0.), exer_payoff(price.state)
     else:
         next_price: float = np.exp(
             np.random.normal(
                 np.log(price.state) + (r - s * s / 2) * dt,
                 s * np.sqrt(dt)))
         return NonTerminal(next_price), 0.
Ejemplo n.º 6
0
 def sr_sampler_func(
         state=state,
         action=action) -> Tuple[State[AssetAllocState], float]:
     time, wealth = state.state
     next_wealth: float = action * (1 + distrs[time].sample()) \
         + (wealth - action) * (1 + rates[time])
     reward: float = utility_f(next_wealth) \
         if time == steps - 1 else 0.
     next_pair: AssetAllocState = (time + 1, next_wealth)
     next_state: State[AssetAllocState] = \
         Terminal(next_pair) if time == steps - 1 \
         else NonTerminal(next_pair)
     return (next_state, reward)
Ejemplo n.º 7
0
 def get_opt_vf_and_policy(self) -> \
         Iterator[Tuple[V[int], FiniteDeterministicPolicy[int, bool]]]:
     dt: float = self.dt()
     up_factor: float = np.exp(self.vol * np.sqrt(dt))
     up_prob: float = (np.exp(self.rate * dt) * up_factor - 1) / \
         (up_factor * up_factor - 1)
     return optimal_vf_and_policy(steps=[{
         NonTerminal(j): {
             True:
             Constant(
                 (Terminal(-1), self.payoff(i * dt, self.state_price(i,
                                                                     j)))),
             False:
             Categorical({
                 (NonTerminal(j + 1), 0.): up_prob,
                 (NonTerminal(j), 0.): 1 - up_prob
             })
         }
         for j in range(i + 1)
     } for i in range(self.num_steps + 1)],
                                  gamma=np.exp(-self.rate * dt))