def get_qvf_func_approx(self) -> DNNApprox[Tuple[float, float]]: adam_gradient: AdamGradient = AdamGradient(learning_rate=0.1, decay1=0.9, decay2=0.999) return DNNApprox.create(feature_functions=self.feature_functions, dnn_spec=self.dnn_spec, adam_gradient=adam_gradient)
def get_vf_func_approx( self, ff: Sequence[Callable[[float], float]]) -> DNNApprox[float]: adam_gradient: AdamGradient = AdamGradient(learning_rate=0.1, decay1=0.9, decay2=0.999) return DNNApprox.create(feature_functions=ff, dnn_spec=self.dnn_spec, adam_gradient=adam_gradient)
def policy_mean_approx(self) -> \ FunctionApprox[NonTerminal[AssetAllocState]]: adam_gradient: AdamGradient = AdamGradient(learning_rate=0.003, decay1=0.9, decay2=0.999) ffs: List[Callable[[NonTerminal[AssetAllocState]], float]] = [] for f in self.policy_feature_funcs: def this_f(st: NonTerminal[AssetAllocState], f=f) -> float: return f(st.state) ffs.append(this_f) return DNNApprox.create(feature_functions=ffs, dnn_spec=self.policy_mean_dnn_spec, adam_gradient=adam_gradient)
def get_qvf_func_approx(self) -> \ DNNApprox[Tuple[NonTerminal[float], float]]: adam_gradient: AdamGradient = AdamGradient(learning_rate=0.1, decay1=0.9, decay2=0.999) ffs: List[Callable[[Tuple[NonTerminal[float], float]], float]] = [] for f in self.feature_functions: def this_f(pair: Tuple[NonTerminal[float], float], f=f) -> float: return f((pair[0].state, pair[1])) ffs.append(this_f) return DNNApprox.create(feature_functions=ffs, dnn_spec=self.dnn_spec, adam_gradient=adam_gradient)
def adam_gradient(): return AdamGradient(learning_rate=0.1, decay1=0.9, decay2=0.999)
true_vf: np.ndarray = si_mrp.get_value_function_vec(gamma=gamma) mc_episode_length_tol: float = 1e-6 num_episodes = 10000 td_episode_length: int = 100 initial_learning_rate: float = 0.03 half_life: float = 1000.0 exponent: float = 0.5 ffs: Sequence[Callable[[InventoryState], float]] = [(lambda x, s=s: float(x == s)) for s in nt_states] mc_ag: AdamGradient = AdamGradient(learning_rate=0.05, decay1=0.9, decay2=0.999) td_ag: AdamGradient = AdamGradient(learning_rate=0.003, decay1=0.9, decay2=0.999) mc_func_approx: LinearFunctionApprox[ InventoryState] = LinearFunctionApprox.create(feature_functions=ffs, adam_gradient=mc_ag) td_func_approx: LinearFunctionApprox[ InventoryState] = LinearFunctionApprox.create(feature_functions=ffs, adam_gradient=td_ag) it_mc: Iterable[FunctionApprox[InventoryState]] = mc_prediction_learning_rate(
μ: float σ: float expiry: int def __init__(self, μ: float, σ: float, expiry: int, expectation_samples: int = 10000): self.μ = μ self.σ = σ super().__init__(sampler=lambda: (np.random.randint(expiry + 1), np.random.normal(loc=self.μ, scale=self.σ)), expectation_samples=expectation_samples) nt_states_distribution = InitialDistrib(strike, sigma, expiry_val) ag = AdamGradient(learning_rate=0.5, decay1=0.9, decay2=0.999) ffs = [lambda x: x[0], lambda x: x[1]] lfa = LinearFunctionApprox.create(feature_functions=ffs, adam_gradient=ag, regularization_coeff=0.001, direct_solve=True) solution_2 = value_iteration(mdp, 1, lfa, nt_states_distribution, 100) """ for i in solution_2: print(i) """ #This second method does not really work
def fitted_dql_put_option( expiry: float, num_steps: int, num_paths: int, spot_price: float, spot_price_frac: float, rate: float, vol: float, strike: float, training_iters: int) -> DNNApprox[Tuple[float, float]]: reg_coeff: float = 1e-2 neurons: Sequence[int] = [6] # features: List[Callable[[Tuple[float, float]], float]] = [ # lambda t_s: 1., # lambda t_s: t_s[0] / expiry, # lambda t_s: t_s[1] / strike, # lambda t_s: t_s[0] * t_s[1] / (expiry * strike) # ] num_laguerre: int = 2 ident: np.ndarray = np.eye(num_laguerre) features: List[Callable[[Tuple[float, float]], float]] = [lambda _: 1.] features += [(lambda t_s, i=i: np.exp(-t_s[1] / (2 * strike)) * lagval( t_s[1] / strike, ident[i])) for i in range(num_laguerre)] features += [ lambda t_s: np.cos(-t_s[0] * np.pi / (2 * expiry)), lambda t_s: np.log(expiry - t_s[0]) if t_s[0] != expiry else 0., lambda t_s: (t_s[0] / expiry)**2 ] ds: DNNSpec = DNNSpec(neurons=neurons, bias=True, hidden_activation=lambda x: np.log(1 + np.exp(-x)), hidden_activation_deriv=lambda y: np.exp(-y) - 1, output_activation=lambda x: x, output_activation_deriv=lambda y: np.ones_like(y)) fa: DNNApprox[Tuple[float, float]] = DNNApprox.create( feature_functions=features, dnn_spec=ds, adam_gradient=AdamGradient(learning_rate=0.1, decay1=0.9, decay2=0.999), regularization_coeff=reg_coeff) dt: float = expiry / num_steps gamma: float = np.exp(-rate * dt) training_data: Sequence[TrainingDataType] = training_sim_data( expiry=expiry, num_steps=num_steps, num_paths=num_paths, spot_price=spot_price, spot_price_frac=spot_price_frac, rate=rate, vol=vol) for _ in range(training_iters): t_ind, s, s1 = training_data[randrange(len(training_data))] t = t_ind * dt x_val: Tuple[float, float] = (t, s) val: float = max(strike - s1, 0) if t_ind < num_steps - 1: val = max(val, fa.evaluate([(t + dt, s1)])[0]) y_val: float = gamma * val fa = fa.update([(x_val, y_val)]) # for w in fa.weights: # pprint(w.weights) return fa
def vf_adam_gradient(self) -> AdamGradient: return AdamGradient( learning_rate=0.003, decay1=0.9, decay2=0.999 )