コード例 #1
0
    def test_finite_horizon_MRP(self):
        finite = finite_horizon_MRP(self.finite_flip_flop, 10)

        trues = [NonTerminal(WithTime(True, time)) for time in range(10)]
        falses = [NonTerminal(WithTime(False, time)) for time in range(10)]
        non_terminal_states = set(trues + falses)
        self.assertEqual(set(finite.non_terminal_states), non_terminal_states)

        expected_transition = {}
        for state in non_terminal_states:
            t: int = state.state.time
            st: bool = state.state.state
            if t < 9:
                prob = {
                    (NonTerminal(WithTime(st, t + 1)), 1.0): 0.3,
                    (NonTerminal(WithTime(not st, t + 1)), 2.0): 0.7
                }
            else:
                prob = {
                    (Terminal(WithTime(st, t + 1)), 1.0): 0.3,
                    (Terminal(WithTime(not st, t + 1)), 2.0): 0.7
                }

            expected_transition[state] = Categorical(prob)

        for state in non_terminal_states:
            distribution.assert_almost_equal(
                self,
                finite.transition_reward(state),
                expected_transition[state])
コード例 #2
0
    def test_unwrap_finite_horizon_MDP(self):
        finite = finite_horizon_MDP(self.finite_flip_flop, 10)
        unwrapped = unwrap_finite_horizon_MDP(finite)

        self.assertEqual(len(unwrapped), 10)

        def action_mapping_for(s: WithTime[bool]) -> \
                ActionMapping[bool, WithTime[bool]]:
            same = NonTerminal(s.step_time())
            different = NonTerminal(dataclasses.replace(
                s.step_time(),
                state=not s.state
            ))

            return {
                True: Categorical({
                    (same, 1.0): 0.7,
                    (different, 2.0): 0.3
                }),
                False: Categorical({
                    (same, 1.0): 0.3,
                    (different, 2.0): 0.7
                })
            }

        for t in range(9):
            for s in True, False:
                s_time = WithTime(state=s, time=t)
                for a in True, False:
                    distribution.assert_almost_equal(
                        self,
                        finite.mapping[NonTerminal(s_time)][a],
                        action_mapping_for(s_time)[a]
                    )

        for s in True, False:
            s_time = WithTime(state=s, time=9)
            same = Terminal(s_time.step_time())
            different = Terminal(dataclasses.replace(
                s_time.step_time(),
                state=not s_time.state
            ))
            act_map = {
                True: Categorical({
                    (same, 1.0): 0.7,
                    (different, 2.0): 0.3
                }),
                False: Categorical({
                    (same, 1.0): 0.3,
                    (different, 2.0): 0.7
                })

            }
            for a in True, False:
                distribution.assert_almost_equal(
                    self,
                    finite.mapping[NonTerminal(s_time)][a],
                    act_map[a]
                )
コード例 #3
0
    def test_optimal_policy(self):
        finite = finite_horizon_MDP(self.finite_flip_flop, limit=10)
        steps = unwrap_finite_horizon_MDP(finite)
        *v_ps, (_, p) = optimal_vf_and_policy(steps, gamma=1)

        for _, a in p.action_for.items():
            self.assertEqual(a, False)

        self.assertAlmostEqual(v_ps[0][0][NonTerminal(True)], 17)
        self.assertAlmostEqual(v_ps[5][0][NonTerminal(False)], 17 / 2)
コード例 #4
0
 def transition_for(_):
     return {
         True: Categorical({
             (NonTerminal(True), 1.0): 0.3,
             (NonTerminal(False), 2.0): 0.7
         }),
         False: Categorical({
             (NonTerminal(True), 2.0): 0.7,
             (NonTerminal(False), 1.0): 0.3
         })
     }
コード例 #5
0
    def test_finite_horizon_MDP(self):
        finite = finite_horizon_MDP(self.finite_flip_flop, limit=10)

        self.assertEqual(len(finite.non_terminal_states), 20)

        for s in finite.non_terminal_states:
            self.assertEqual(set(finite.actions(s)), {False, True})

        start = NonTerminal(WithTime(state=True, time=0))
        result = finite.mapping[start][False]
        expected_result = Categorical({
            (NonTerminal(WithTime(False, time=1)), 2.0): 0.7,
            (NonTerminal(WithTime(True, time=1)), 1.0): 0.3
        })
        distribution.assert_almost_equal(self, result, expected_result)
コード例 #6
0
 def states_sampler_func() -> NonTerminal[float]:
     start: float = np.random.lognormal(log_mean, log_stdev)
     price = np.exp(
         np.random.normal(
             np.log(start) +
             (self.rate - self.vol * self.vol / 2) * time,
             self.vol * np.sqrt(time)))
     return NonTerminal(price)
コード例 #7
0
    def test_unwrap_finite_horizon_MRP(self):
        finite = finite_horizon_MRP(self.finite_flip_flop, 10)

        def transition_for(_):
            return {
                True: Categorical({
                    (NonTerminal(True), 1.0): 0.3,
                    (NonTerminal(False), 2.0): 0.7
                }),
                False: Categorical({
                    (NonTerminal(True), 2.0): 0.7,
                    (NonTerminal(False), 1.0): 0.3
                })
            }

        unwrapped = unwrap_finite_horizon_MRP(finite)
        self.assertEqual(len(unwrapped), 10)

        expected_transitions = [transition_for(n) for n in range(10)]
        for time in range(9):
            got = unwrapped[time]
            expected = expected_transitions[time]
            distribution.assert_almost_equal(
                self, got[NonTerminal(True)],
                expected[True]
            )
            distribution.assert_almost_equal(
                self, got[NonTerminal(False)],
                expected[False]
            )

        distribution.assert_almost_equal(
            self, unwrapped[9][NonTerminal(True)],
            Categorical({
                (Terminal(True), 1.0): 0.3,
                (Terminal(False), 2.0): 0.7
            })
        )
        distribution.assert_almost_equal(
            self, unwrapped[9][NonTerminal(False)],
            Categorical({
                (Terminal(True), 2.0): 0.7,
                (Terminal(False), 1.0): 0.3
            })
        )
コード例 #8
0
 def states_sampler_func() -> NonTerminal[PriceAndShares]:
     price: float = self.initial_price_distribution.sample()
     rem: int = self.shares
     for i in range(t):
         sell: int = Choose(range(rem + 1)).sample()
         price = self.price_dynamics[i](PriceAndShares(
             price=price, shares=rem)).sample()
         rem -= sell
     return NonTerminal(PriceAndShares(price=price, shares=rem))
コード例 #9
0
        def action_mapping_for(s: WithTime[bool]) -> \
                ActionMapping[bool, WithTime[bool]]:
            same = NonTerminal(s.step_time())
            different = NonTerminal(dataclasses.replace(
                s.step_time(),
                state=not s.state
            ))

            return {
                True: Categorical({
                    (same, 1.0): 0.7,
                    (different, 2.0): 0.3
                }),
                False: Categorical({
                    (same, 1.0): 0.3,
                    (different, 2.0): 0.7
                })
            }
コード例 #10
0
 def states_sampler_func() -> NonTerminal[float]:
     wealth: float = self.initial_wealth_distribution.sample()
     for i in range(t):
         distr: Distribution[float] = self.risky_return_distributions[i]
         rate: float = self.riskless_returns[i]
         alloc: float = actions_distr.sample()
         wealth = alloc * (1 + distr.sample()) + \
             (wealth - alloc) * (1 + rate)
     return NonTerminal(wealth)
コード例 #11
0
 def sr_sampler_func(wealth=wealth,
                     alloc=alloc) -> Tuple[State[float], float]:
     next_wealth: float = alloc * (1 + distr.sample()) \
         + (wealth.state - alloc) * (1 + rate)
     reward: float = utility_f(next_wealth) \
         if t == steps - 1 else 0.
     next_state: State[float] = Terminal(next_wealth) \
         if t == steps - 1 else NonTerminal(next_wealth)
     return (next_state, reward)
コード例 #12
0
 def sr_sampler_func(price=price,
                     exer=exer) -> Tuple[State[float], float]:
     if exer:
         return Terminal(0.), exer_payoff(price.state)
     else:
         next_price: float = np.exp(
             np.random.normal(
                 np.log(price.state) + (r - s * s / 2) * dt,
                 s * np.sqrt(dt)))
         return NonTerminal(next_price), 0.
コード例 #13
0
 def get_opt_vf_and_policy(self) -> \
         Iterator[Tuple[V[int], FiniteDeterministicPolicy[int, bool]]]:
     dt: float = self.dt()
     up_factor: float = np.exp(self.vol * np.sqrt(dt))
     up_prob: float = (np.exp(self.rate * dt) * up_factor - 1) / \
         (up_factor * up_factor - 1)
     return optimal_vf_and_policy(steps=[{
         NonTerminal(j): {
             True:
             Constant(
                 (Terminal(-1), self.payoff(i * dt, self.state_price(i,
                                                                     j)))),
             False:
             Categorical({
                 (NonTerminal(j + 1), 0.): up_prob,
                 (NonTerminal(j), 0.): 1 - up_prob
             })
         }
         for j in range(i + 1)
     } for i in range(self.num_steps + 1)],
                                  gamma=np.exp(-self.rate * dt))
コード例 #14
0
 def sr_sampler_func(
         p_r=p_r,
         sell=sell) -> Tuple[State[PriceAndShares], float]:
     p_s: PriceAndShares = PriceAndShares(price=p_r.state.price,
                                          shares=sell)
     next_price: float = dynamics[t](p_s).sample()
     next_rem: int = p_r.state.shares - sell
     next_state: PriceAndShares = PriceAndShares(
         price=next_price, shares=next_rem)
     reward: float = utility_f(
         sell * (p_r.state.price - price_diff[t](p_s)))
     return (NonTerminal(next_state), reward)
コード例 #15
0
 def sr_sampler_func(
         state=state,
         action=action) -> Tuple[State[AssetAllocState], float]:
     time, wealth = state.state
     next_wealth: float = action * (1 + distrs[time].sample()) \
         + (wealth - action) * (1 + rates[time])
     reward: float = utility_f(next_wealth) \
         if time == steps - 1 else 0.
     next_pair: AssetAllocState = (time + 1, next_wealth)
     next_state: State[AssetAllocState] = \
         Terminal(next_pair) if time == steps - 1 \
         else NonTerminal(next_pair)
     return (next_state, reward)
コード例 #16
0
    def test_evaluate(self):
        process = finite_horizon_MRP(self.finite_flip_flop, 10)
        vs = list(evaluate(unwrap_finite_horizon_MRP(process), gamma=1))

        self.assertEqual(len(vs), 10)

        self.assertAlmostEqual(vs[0][NonTerminal(True)], 17)
        self.assertAlmostEqual(vs[0][NonTerminal(False)], 17)

        self.assertAlmostEqual(vs[5][NonTerminal(True)], 17 / 2)
        self.assertAlmostEqual(vs[5][NonTerminal(False)], 17 / 2)

        self.assertAlmostEqual(vs[9][NonTerminal(True)], 17 / 10)
        self.assertAlmostEqual(vs[9][NonTerminal(False)], 17 / 10)
コード例 #17
0
 def explore(s: S, mdp=mdp) -> Iterable[A]:
     return mdp.actions(NonTerminal(s))
コード例 #18
0
        opt_payoff = lambda _, x: max(x - strike, 0)
    else:
        opt_payoff = lambda _, x: max(strike - x, 0)

    opt_ex_bin_tree: OptimalExerciseBinTree = OptimalExerciseBinTree(
        spot_price=spot_price_val,
        payoff=opt_payoff,
        expiry=expiry_val,
        rate=rate_val,
        vol=vol_val,
        num_steps=num_steps_val)

    vf_seq, policy_seq = zip(*opt_ex_bin_tree.get_opt_vf_and_policy())
    ex_boundary: Sequence[Tuple[float, float]] = \
        opt_ex_bin_tree.option_exercise_boundary(policy_seq, is_call)
    time_pts, ex_bound_pts = zip(*ex_boundary)
    label = ("Call" if is_call else "Put") + " Option Exercise Boundary"
    plot_list_of_curves(list_of_x_vals=[time_pts],
                        list_of_y_vals=[ex_bound_pts],
                        list_of_colors=["b"],
                        list_of_curve_labels=[label],
                        x_label="Time",
                        y_label="Underlying Price",
                        title=label)

    european: float = opt_ex_bin_tree.european_price(is_call, strike)
    print(f"European Price = {european:.3f}")

    am_price: float = vf_seq[0][NonTerminal(0)]
    print(f"American Price = {am_price:.3f}")
コード例 #19
0
ファイル: asset_alloc_pg.py プロジェクト: shenoy1/RL-book
 def start_states_distribution_func() -> NonTerminal[AssetAllocState]:
     wealth: float = self.initial_wealth_distribution.sample()
     return NonTerminal((0, wealth))
コード例 #20
0
ファイル: asset_alloc_pg.py プロジェクト: shenoy1/RL-book
            q_feature_functions=q_ffs,
            q_dnn_spec=dnn_qvf_spec,
            v_feature_functions=v_ffs,
            v_dnn_spec=dnn_vf_spec
        )
    actor_critic_error_policies: Iterator[FunctionApprox[
        NonTerminal[AssetAllocState]]] = aad.actor_critic_td_error(
            feature_functions=v_ffs,
            q_value_dnn_spec=dnn_vf_spec
        )

    num_episodes: int = 20000

    x: Sequence[int] = range(num_episodes)
    y0: Sequence[float] = [base_alloc * (1 + r) ** (1 - steps)] * num_episodes
    y1: Sequence[float] = [p(NonTerminal((init_wealth, 0))) for p in
                           itertools.islice(reinforce_policies, num_episodes)]
    y2: Sequence[float] = [p(NonTerminal((init_wealth, 0))) for p in
                           itertools.islice(
                               actor_critic_policies,
                               0,
                               num_episodes * steps,
                               steps
                           )]
    y3: Sequence[float] = [p(NonTerminal((init_wealth, 0))) for p in
                           itertools.islice(
                               actor_critic_adv_policies,
                               0,
                               num_episodes * steps,
                               steps
                           )]
コード例 #21
0
 def optimal_action(s: S) -> A:
     _, a = q.argmax((NonTerminal(s), a) for a in actions(NonTerminal(s)))
     return a
コード例 #22
0
        func_approx=fa,
        initial_price_distribution=init_price_distrib)
    it_vf: Iterator[Tuple[ValueFunctionApprox[PriceAndShares],
                          DeterministicPolicy[PriceAndShares, int]]] = \
        ooe.backward_induction_vf_and_pi()

    state: PriceAndShares = PriceAndShares(price=init_price_mean,
                                           shares=num_shares)
    print("Backward Induction: VF And Policy")
    print("---------------------------------")
    print()
    for t, (vf, pol) in enumerate(it_vf):
        print(f"Time {t:d}")
        print()
        opt_sale: int = pol.action_for(state)
        val: float = vf(NonTerminal(state))
        print(f"Optimal Sales = {opt_sale:d}, Opt Val = {val:.3f}")
        print()
        print("Optimal Weights below:")
        print(vf.weights.weights)
        print()

    print("Analytical Solution")
    print("-------------------")
    print()

    for t in range(num_time_steps):
        print(f"Time {t:d}")
        print()
        left: int = num_time_steps - t
        opt_sale_anal: float = num_shares / num_time_steps
コード例 #23
0
    #     print("Weights")
    #     for w in v.weights:
    #         print(w.weights)
    #     print()

    it_qvf: Iterator[QValueFunctionApprox[float, float]] = \
        aad.backward_induction_qvf()

    print("Backward Induction on Q-Value Function")
    print("--------------------------------------")
    print()
    for t, q in enumerate(it_qvf):
        print(f"Time {t:d}")
        print()
        opt_alloc: float = max(
            ((q((NonTerminal(init_wealth), ac)), ac) for ac in alloc_choices),
            key=itemgetter(0)
        )[1]
        val: float = max(q((NonTerminal(init_wealth), ac))
                         for ac in alloc_choices)
        print(f"Opt Risky Allocation = {opt_alloc:.3f}, Opt Val = {val:.3f}")
        print("Optimal Weights below:")
        for wts in q.weights:
            pprint(wts.weights)
        print()

    print("Analytical Solution")
    print("-------------------")
    print()

    for t in range(steps):
コード例 #24
0
 def optimal_value_curve(self, func: FunctionApprox[NonTerminal[float]],
                         prices: Sequence[float]) -> np.ndarray:
     return func.evaluate([NonTerminal(p) for p in prices])
コード例 #25
0
    #     print("Weights")
    #     for w in v.weights:
    #         print(w.weights)
    #     print()

    it_qvf: Iterator[QValueFunctionApprox[float, float]] = \
        aad.backward_induction_qvf()

    print("Backward Induction on Q-Value Function")
    print("--------------------------------------")
    print()
    for t, q in enumerate(it_qvf):
        print(f"Time {t:d}")
        print()
        opt_alloc: float = max(
            ((q((NonTerminal(init_wealth), ac)), ac) for ac in alloc_choices),
            key=itemgetter(0))[1]
        val: float = max(
            q((NonTerminal(init_wealth), ac)) for ac in alloc_choices)
        print(f"Opt Risky Allocation = {opt_alloc:.3f}, Opt Val = {val:.3f}")
        print("Optimal Weights below:")
        for wts in q.weights:
            pprint(wts.weights)
        print()

    print("Analytical Solution")
    print("-------------------")
    print()

    for t in range(steps):
        print(f"Time {t:d}")
コード例 #26
0
    for t, (v, p) in enumerate(it_vf):
        print(f"Time {t:d}")
        print()

        if t == 0 or t == int(num_steps_val / 2) or t == num_steps_val - 1:
            exer_curve: np.ndarray = opt_ex_bi.exercise_curve(prices=prices)
            opt_val_curve: np.ndarray = opt_ex_bi.optimal_value_curve(
                func=v, prices=prices)
            plt.plot(prices, opt_val_curve, "r", prices, exer_curve, "b")
            time: float = t * expiry_val / num_steps_val
            plt.title(f"OptVal and Exercise Curves for Time = {time:.3f}")
            plt.show()

        all_funcs.append(v)

        opt_alloc: float = p.action_for(spot_price_val)
        val: float = v(NonTerminal(spot_price_val))
        print(f"Opt Action = {opt_alloc}, Opt Val = {val:.3f}")
        print()

    ex_bound: Sequence[float] = opt_ex_bi.put_option_exercise_boundary(
        all_funcs, strike)
    plt.plot(range(num_steps_val + 1), ex_bound)
    plt.title("Exercise Boundary")
    plt.show()

    print("European Put Price")
    print("------------------")
    print()
    print(opt_ex_bi.european_put_price(strike=strike))
コード例 #27
0
    actor_critic_adv_policies: Iterator[FunctionApprox[
        NonTerminal[AssetAllocState]]] = aad.actor_critic_advantage(
            q_feature_functions=q_ffs,
            q_dnn_spec=dnn_qvf_spec,
            v_feature_functions=v_ffs,
            v_dnn_spec=dnn_vf_spec)
    actor_critic_error_policies: Iterator[FunctionApprox[
        NonTerminal[AssetAllocState]]] = aad.actor_critic_td_error(
            feature_functions=v_ffs, q_value_dnn_spec=dnn_vf_spec)

    num_episodes: int = 50000

    x: Sequence[int] = range(num_episodes)
    y0: Sequence[float] = [base_alloc * (1 + r)**(1 - steps)] * num_episodes
    y1: Sequence[float] = [
        p(NonTerminal((init_wealth, 0)))
        for p in itertools.islice(reinforce_policies, num_episodes)
    ]
    y2: Sequence[float] = [
        p(NonTerminal((init_wealth, 0)))
        for p in itertools.islice(actor_critic_policies, 0, num_episodes *
                                  steps, steps)
    ]
    y3: Sequence[float] = [
        p(NonTerminal((init_wealth, 0)))
        for p in itertools.islice(actor_critic_adv_policies, 0, num_episodes *
                                  steps, steps)
    ]
    y4: Sequence[float] = [
        p(NonTerminal((init_wealth, 0))) for p in itertools.islice(
            actor_critic_error_policies, 0, num_episodes * steps, steps)