def get_dnn_model() -> DNNApprox[Triple]: ffs = feature_functions() ag = adam_gradient() def relu(arg: np.ndarray) -> np.ndarray: return np.vectorize(lambda x: x if x > 0. else 0.)(arg) def relu_deriv(res: np.ndarray) -> np.ndarray: return np.vectorize(lambda x: 1. if x > 0. else 0.)(res) def identity(arg: np.ndarray) -> np.ndarray: return arg def identity_deriv(res: np.ndarray) -> np.ndarray: return np.ones_like(res) ds = DNNSpec(neurons=[2], bias=True, hidden_activation=relu, hidden_activation_deriv=relu_deriv, output_activation=identity, output_activation_deriv=identity_deriv) return DNNApprox.create(feature_functions=ffs, dnn_spec=ds, adam_gradient=ag, regularization_coeff=0.05)
def fitted_dql_put_option( obj: OptimalExerciseRL, strike: float, expiry: float, training_data: Sequence[TrainingDataType], training_iters: int) -> DNNApprox[Tuple[float, float]]: features: List[Callable[[Tuple[float, float]], float]] = [ lambda t_s: 1., lambda t_s: t_s[0] / expiry, lambda t_s: t_s[1] / strike, lambda t_s: t_s[0] * t_s[1] / (expiry * strike) ] ds: DNNSpec = DNNSpec( neurons=[1], bias=True, hidden_activation=lambda x: np.log(1 + np.exp(-x)), hidden_activation_deriv=lambda y: np.exp(-y) - 1, # hidden_activation=lambda x: np.vectorize( # lambda y: y if y > 0. else 0. # )(x), # hidden_activation_deriv=lambda x: np.vectorize( # lambda y: 1. if y > 0. else 0. # )(x), output_activation=lambda x: x, output_activation_deriv=lambda y: np.ones_like(y)) dql_reg: float = 1e-6 dnn_approx: DNNApprox[Tuple[float, float]] = obj.dnn_func_approx( features=features, ds=ds, reg=dql_reg) return obj.train_dql(training_data=training_data, init_fa=dnn_approx, training_iters=training_iters)
strike, sigma) if is_call: opt_payoff = lambda x: max(x - strike, 0) else: opt_payoff = lambda x: max(strike - x, 0) feature_funcs: Sequence[Callable[[Tuple[float, float]], float]] = \ [ lambda _: 1., lambda w_x: w_x[0] ] dnn: DNNSpec = DNNSpec(neurons=[], bias=False, hidden_activation=lambda x: x, hidden_activation_deriv=lambda y: np.ones_like(y), output_activation=lambda x: -np.exp(-x), output_activation_deriv=lambda y: -y) american_option: AmericanOption = AmericanOption( asset_price_distribution=asset_price_distribution, payoff=opt_payoff, expiry=expiry_val, dnn_spec=dnn, feature_functions=feature_funcs) print("Using Method 1") #This does not work all the time, it works mostly when executing the line below separately from the rest it_qvf: Iterator[DNNApprox[Tuple[float, float]]] = \ american_option.backward_induction_qvf() print("Backward Induction on Q-Value Function") print("--------------------------------------")
def fitted_dql_put_option( expiry: float, num_steps: int, num_paths: int, spot_price: float, spot_price_frac: float, rate: float, vol: float, strike: float, training_iters: int) -> DNNApprox[Tuple[float, float]]: reg_coeff: float = 1e-2 neurons: Sequence[int] = [6] # features: List[Callable[[Tuple[float, float]], float]] = [ # lambda t_s: 1., # lambda t_s: t_s[0] / expiry, # lambda t_s: t_s[1] / strike, # lambda t_s: t_s[0] * t_s[1] / (expiry * strike) # ] num_laguerre: int = 2 ident: np.ndarray = np.eye(num_laguerre) features: List[Callable[[Tuple[float, float]], float]] = [lambda _: 1.] features += [(lambda t_s, i=i: np.exp(-t_s[1] / (2 * strike)) * lagval( t_s[1] / strike, ident[i])) for i in range(num_laguerre)] features += [ lambda t_s: np.cos(-t_s[0] * np.pi / (2 * expiry)), lambda t_s: np.log(expiry - t_s[0]) if t_s[0] != expiry else 0., lambda t_s: (t_s[0] / expiry)**2 ] ds: DNNSpec = DNNSpec(neurons=neurons, bias=True, hidden_activation=lambda x: np.log(1 + np.exp(-x)), hidden_activation_deriv=lambda y: np.exp(-y) - 1, output_activation=lambda x: x, output_activation_deriv=lambda y: np.ones_like(y)) fa: DNNApprox[Tuple[float, float]] = DNNApprox.create( feature_functions=features, dnn_spec=ds, adam_gradient=AdamGradient(learning_rate=0.1, decay1=0.9, decay2=0.999), regularization_coeff=reg_coeff) dt: float = expiry / num_steps gamma: float = np.exp(-rate * dt) training_data: Sequence[TrainingDataType] = training_sim_data( expiry=expiry, num_steps=num_steps, num_paths=num_paths, spot_price=spot_price, spot_price_frac=spot_price_frac, rate=rate, vol=vol) for _ in range(training_iters): t_ind, s, s1 = training_data[randrange(len(training_data))] t = t_ind * dt x_val: Tuple[float, float] = (t, s) val: float = max(strike - s1, 0) if t_ind < num_steps - 1: val = max(val, fa.evaluate([(t + dt, s1)])[0]) y_val: float = gamma * val fa = fa.update([(x_val, y_val)]) # for w in fa.weights: # pprint(w.weights) return fa
alloc: float = base_alloc / growth print(f"Time {t:d}: Optimal Risky Allocation = {alloc:.3f}") print() risky_ret: Sequence[Gaussian] = [Gaussian(μ=μ, σ=σ) for _ in range(steps)] riskless_ret: Sequence[float] = [r for _ in range(steps)] utility_function: Callable[[float], float] = lambda x: - np.exp(-a * x) / a policy_feature_funcs: Sequence[Callable[[AssetAllocState], float]] = \ [ lambda w_t: (1 + r) ** w_t[1] ] init_wealth_distr: Gaussian = Gaussian(μ=init_wealth, σ=init_wealth_stdev) policy_mean_dnn_spec: DNNSpec = DNNSpec( neurons=[], bias=False, hidden_activation=lambda x: x, hidden_activation_deriv=lambda y: np.ones_like(y), output_activation=lambda x: x, output_activation_deriv=lambda y: np.ones_like(y) ) aad: AssetAllocPG = AssetAllocPG( risky_return_distributions=risky_ret, riskless_returns=riskless_ret, utility_func=utility_function, policy_feature_funcs=policy_feature_funcs, policy_mean_dnn_spec=policy_mean_dnn_spec, policy_stdev=policy_stdev, initial_wealth_distribution=init_wealth_distr ) reinforce_policies: Iterator[FunctionApprox[
risky_ret: Sequence[Gaussian] = [Gaussian(μ=μ, σ=σ) for _ in range(steps)] riskless_ret: Sequence[float] = [r for _ in range(steps)] utility_function: Callable[[float], float] = lambda x: -np.exp(-a * x) / a alloc_choices: Sequence[float] = np.linspace(2 / 3 * base_alloc, 4 / 3 * base_alloc, 11) feature_funcs: Sequence[Callable[[Tuple[float, float]], float]] = \ [ lambda _: 1., lambda w_x: w_x[0], lambda w_x: w_x[1], lambda w_x: w_x[1] * w_x[1] ] dnn: DNNSpec = DNNSpec( neurons=[], bias=False, hidden_activation=lambda x: x, hidden_activation_deriv=lambda _: 1., output_activation=lambda x: -np.sign(a) * np.exp(-x)) init_wealth_distr: Gaussian = Gaussian(μ=init_wealth, σ=init_wealth_var) aad: AssetAllocDiscrete = AssetAllocDiscrete( risky_return_distributions=risky_ret, riskless_returns=riskless_ret, utility_func=utility_function, risky_alloc_choices=alloc_choices, feature_functions=feature_funcs, dnn_spec=dnn, initial_wealth_distribution=init_wealth_distr) # vf_ff: Sequence[Callable[[float], float]] = [lambda _: 1., lambda w: w] # it_vf: Iterator[Tuple[DNNApprox[float], Policy[float, float]]] = \