class TestDistribution(unittest.TestCase): def setUp(self): self.finite = Choose(range(0, 6)) self.sampled = SampledDistribution(lambda: self.finite.sample(), 100000) def test_expectation(self): expected_finite = self.finite.expectation(lambda x: x) expected_sampled = self.sampled.expectation(lambda x: x) self.assertLess(abs(expected_finite - expected_sampled), 0.02) def test_sample_n(self): samples = self.sampled.sample_n(10) self.assertEqual(len(samples), 10) self.assertTrue(all(0 <= s < 6 for s in samples))
def start_states_distribution(self) -> \ SampledDistribution[NonTerminal[AssetAllocState]]: def start_states_distribution_func() -> NonTerminal[AssetAllocState]: wealth: float = self.initial_wealth_distribution.sample() return NonTerminal((0, wealth)) return SampledDistribution(sampler=start_states_distribution_func)
def act(self, state: InventoryState) -> SampledDistribution[int]: def action_func(state=state) -> int: reorder_point_sample: int = \ np.random.poisson(self.reorder_point_poisson_mean) return max(reorder_point_sample - state.inventory_position(), 0) return SampledDistribution(action_func)
def transition(self, state: NonTerminal[bool]) -> \ Distribution[State[bool]]: def next_state(state=state): switch_states = Bernoulli(self.p).sample() next_st: bool = not state.state if switch_states else state.state return NonTerminal(next_st) return SampledDistribution(next_state)
def transition_reward( self, state: StateMP1) -> SampledDistribution[Tuple[StateMP1, float]]: def sample_next_state_reward(state=state) ->\ Tuple[StateMP1, float]: next_state = self.transition(state).sample() reward: float = stock_reward(state.price) return next_state, reward return SampledDistribution(sample_next_state_reward)
def step(self, wealth: float, alloc: float) -> SampledDistribution[Tuple[float, float]]: def sr_sampler_func(wealth=wealth, alloc=alloc) -> Tuple[float, float]: next_wealth: float = alloc * (1 + distr.sample()) \ + (wealth - alloc) * (1 + rate) reward: float = utility_f(next_wealth) \ if t == steps - 1 else 0. return (next_wealth, reward) return SampledDistribution(sampler=sr_sampler_func, expectation_samples=1000)
def transition(self, state: NonTerminal[S]) -> Distribution[State[S]]: '''Transitions the Markov Reward Process, ignoring the generated reward (which makes this just a normal Markov Process). ''' distribution = self.transition_reward(state) def next_state(distribution=distribution): next_s, _ = distribution.sample() return next_s return SampledDistribution(next_state)
def get_states_distribution(self, t: int) -> \ SampledDistribution[NonTerminal[PriceAndShares]]: def states_sampler_func() -> NonTerminal[PriceAndShares]: price: float = self.initial_price_distribution.sample() rem: int = self.shares for i in range(t): sell: int = Choose(range(rem + 1)).sample() price = self.price_dynamics[i](PriceAndShares( price=price, shares=rem)).sample() rem -= sell return NonTerminal(PriceAndShares(price=price, shares=rem)) return SampledDistribution(states_sampler_func)
def transition_reward( self, state: StateMP1) -> SampledDistribution[Tuple[StateMP1, float]]: def sample_next_state_reward(state=state) -> Tuple[StateMP1, float]: up_p = self.up_prob(state) if np.random.random() < up_p: next_state: StateMP1 = StateMP1(state.price + 1) else: next_state: StateMP1 = StateMP1(state.price - 1) reward: float = self.f(next_state) return next_state, reward return SampledDistribution(sample_next_state_reward)
def transition_reward(self, state: bool) -> Distribution[Tuple[bool, float]]: def next_state(state=state): switch_states = Bernoulli(self.p).sample() if switch_states: next_s = not state reward = 1 if state else 0.5 return next_s, reward else: return state, 0.5 return SampledDistribution(next_state)
def transition_reward( self, state: PriceState ) -> SampledDistribution[Tuple[PriceState, float]]: def sample_next_state_reward(state=state) -> Tuple[PriceState, float]: up_prob = get_logistic_func(self.alpha1)(self.level_param - state.price) up_move: int = binomial(1, up_prob, 1)[0] next_state: PriceState = PriceState(price=state.price + up_move * 2 - 1) reward: float = self.reward_function(next_state) return next_state, reward return SampledDistribution(sample_next_state_reward)
def transition_reward(self, state: S)\ -> Optional[SampledDistribution[Tuple[S, float]]]: action_map: Optional[ActionMapping[A, S]] = mapping[state] if action_map is None: return None else: def next_pair(action_map=action_map): action: A = policy.act(state).sample() return action_map[action].sample() return SampledDistribution(next_pair)
def step(self, state: InventoryState, order: int) -> SampledDistribution[Tuple[InventoryState, float]]: def sample_next_state_reward(state=state, order=order ) -> Tuple[InventoryState, float]: demand_sample: int = np.random.poisson(self.poisson_lambda) ip: int = state.inventory_position() next_state: InventoryState = InventoryState( max(ip - demand_sample, 0), order) reward: float = - self.holding_cost * state.on_hand\ - self.stockout_cost * max(demand_sample - ip, 0) return next_state, reward return SampledDistribution(sample_next_state_reward)
def transition(self, state: S) -> Optional[Distribution[S]]: """Transitions the Markov Reward Process, ignoring the generated reward (which makes this just a normal Markov Process). """ distribution = self.transition_reward(state) if distribution is None: return None def next_state(distribution=distribution): next_s, _ = distribution.sample() return next_s return SampledDistribution(next_state)
def transition_reward(self, state: NonTerminal[bool]) -> \ Distribution[Tuple[State[bool], float]]: def next_state(state=state): switch_states = Bernoulli(self.p).sample() st: bool = state.state if switch_states: next_s: bool = not st reward = 1 if st else 0.5 return NonTerminal(next_s), reward else: return NonTerminal(st), 0.5 return SampledDistribution(next_state)
def transition(self, state: OrderBook) -> Optional[Distribution[OrderBook]]: descending_bids: PriceSizePairs = state.descending_bids ascending_asks: PriceSizePairs = state.ascending_asks volume: int = 0 count_orders: float = 0 list_bid_amounts: Sequence[float] = [] list_ask_amounts: Sequence[float] = [] for i in descending_bids: volume += i.shares count_orders += 1 list_bid_amounts.append(i.dollars) for i in ascending_asks: volume += i.dollars count_orders += 1 list_ask_amounts.append(i.dollars) num_shares_by_model: int = volume // count_orders if len(list_bid_amounts) == 0 or len(list_ask_amounts) == 0: return None def sr_sampler_func( list_bid_amounts=list_bid_amounts, list_ask_amounts=list_ask_amounts, num_shares_by_model=num_shares_by_model) -> OrderBook: if np.random.random() < self.prob_buy_order: print("Buy") if np.random.random() < self.prob_market_order: print("Market") _, new_state = state.buy_market_order(num_shares_by_model) else: print("Limit") price = np.random.choice(list_bid_amounts) new_state = state.buy_limit_order(price, num_shares_by_model) else: print("Sell") if np.random.random() < self.prob_market_order: print("Market") _, new_state = state.sell_market_order(num_shares_by_model) else: print("Limit") price = np.random.choice(list_ask_amounts) new_state = state.sell_limit_order(price, num_shares_by_model) return new_state return SampledDistribution(sampler=sr_sampler_func, expectation_samples=1000)
def step(self, state: float, action: bool) -> SampledDistribution[Tuple[float, float]]: if action: return Constant((state, payoffs(state))) else: def sr_sampler_func(state=state, action=action) -> Tuple[float, float]: next_state_price: float = asset_distribution.sample() reward: float = 0 return (next_state_price, reward) return SampledDistribution(sampler=sr_sampler_func, expectation_samples=1000)
def get_states_distribution(self, t: int) -> SampledDistribution[float]: actions_distr: Choose[float] = self.uniform_actions() def states_sampler_func() -> float: wealth: float = self.initial_wealth_distribution.sample() for i in range(t): distr: Distribution[float] = self.risky_return_distributions[i] rate: float = self.riskless_returns[i] alloc: float = actions_distr.sample() wealth = alloc * (1 + distr.sample()) + (wealth - alloc) * (1 + rate) return wealth return SampledDistribution(states_sampler_func)
def transition(self, state: OrderBook) -> Optional[Distribution[OrderBook]]: '''Given a state of the process, returns a distribution of the next states. Returning None means we are in a terminal state. ''' def sampler_func(state: OrderBook): next_state = state.buy_market_order(self.bid_mkt_distr.sample())[1] next_state = next_state.sell_market_order(self.ask_mkt_distr.sample())[1] for i in range(self.bid_limit_num.sample()): next_state.buy_limit_order(self.bid_limit_distr.sample()[0], self.bid_limit_distr.sample()[1]) for i in range(self.ask_limit_num.sample()): next_state.sell_limit_order(self.ask_limit_distr.sample()[0], self.ask_limit_distr.sample()[0]) return next_state return SampledDistribution(sampler_func, expectation_samples=1000)
def step( self, wealth: NonTerminal[float], alloc: float ) -> SampledDistribution[Tuple[State[float], float]]: def sr_sampler_func(wealth=wealth, alloc=alloc) -> Tuple[State[float], float]: next_wealth: float = alloc * (1 + distr.sample()) \ + (wealth.state - alloc) * (1 + rate) reward: float = utility_f(next_wealth) \ if t == steps - 1 else 0. next_state: State[float] = Terminal(next_wealth) \ if t == steps - 1 else NonTerminal(next_wealth) return (next_state, reward) return SampledDistribution(sampler=sr_sampler_func, expectation_samples=1000)
def transition_reward( self, state: NonTerminal[InventoryState] ) -> SampledDistribution[Tuple[State[InventoryState], float]]: def sample_next_state_reward(state=state) ->\ Tuple[State[InventoryState], float]: demand_sample: int = np.random.poisson(self.poisson_lambda) ip: int = state.state.inventory_position() next_state: InventoryState = InventoryState( max(ip - demand_sample, 0), max(self.capacity - ip, 0)) reward: float = - self.holding_cost * state.on_hand\ - self.stockout_cost * max(demand_sample - ip, 0) return NonTerminal(next_state), reward return SampledDistribution(sample_next_state_reward)
def get_states_distribution(self, t: int) -> \ SampledDistribution[PriceAndShares]: def states_sampler_func() -> PriceAndShares: price: float = self.initial_price_distribution.sample() rem: int = self.shares x: float = self.init_x_distrib.sample() for i in range(t): sell: int = Choose(set(range(rem + 1))).sample() price = self.price_dynamics[i](PriceAndShares(price=price, shares=rem, x=x)).sample() rem -= sell new_x = self.pho * x + Uniform().sample() return PriceAndShares(price=price, shares=rem, x=new_x) return SampledDistribution(states_sampler_func)
def step( self, price: NonTerminal[float], exer: bool ) -> SampledDistribution[Tuple[State[float], float]]: def sr_sampler_func(price=price, exer=exer) -> Tuple[State[float], float]: if exer: return Terminal(0.), exer_payoff(price.state) else: next_price: float = np.exp( np.random.normal( np.log(price.state) + (r - s * s / 2) * dt, s * np.sqrt(dt))) return NonTerminal(next_price), 0. return SampledDistribution(sampler=sr_sampler_func, expectation_samples=200)
def transition_reward(self, state: InventoryState)\ -> SampledDistribution[Tuple[InventoryState, float]]: order = policy.act(state).sample() def sample_next_state_reward( mdp=mdp, state=state, order=order) -> Tuple[InventoryState, float]: demand_sample: int = np.random.poisson(mdp.poisson_lambda) ip: int = state.inventory_position() next_state: InventoryState = InventoryState( max(ip - demand_sample, 0), order) reward: float = - mdp.holding_cost * state.on_hand\ - mdp.stockout_cost * max(demand_sample - ip, 0) return next_state, reward return SampledDistribution(sample_next_state_reward)
def step( self, p_r: PriceAndShares, sell: int ) -> SampledDistribution[Tuple[PriceAndShares, float]]: def sr_sampler_func(p_r=p_r, sell=sell) -> Tuple[PriceAndShares, float]: p_s: PriceAndShares = PriceAndShares(price=p_r.price, shares=sell) next_price: float = dynamics[t](p_s).sample() next_rem: int = p_r.shares - sell next_state: PriceAndShares = PriceAndShares( price=next_price, shares=next_rem) reward: float = utility_f(sell * (p_r.price - price_diff[t](p_s))) return (next_state, reward) return SampledDistribution(sampler=sr_sampler_func, expectation_samples=100)
def step( self, state: NonTerminal[AssetAllocState], action: float ) -> SampledDistribution[Tuple[State[AssetAllocState], float]]: def sr_sampler_func( state=state, action=action) -> Tuple[State[AssetAllocState], float]: time, wealth = state.state next_wealth: float = action * (1 + distrs[time].sample()) \ + (wealth - action) * (1 + rates[time]) reward: float = utility_f(next_wealth) \ if time == steps - 1 else 0. next_pair: AssetAllocState = (time + 1, next_wealth) next_state: State[AssetAllocState] = \ Terminal(next_pair) if time == steps - 1 \ else NonTerminal(next_pair) return (next_state, reward) return SampledDistribution(sampler=sr_sampler_func)
def step(self, state: Tuple[int, float], action: bool) -> SampledDistribution[Tuple[int, float]]: if state[0] > expiry_time or state[0] == -1: return None elif action: return Constant(((-1, state[1]), payoffs(state[1]))) else: def sr_sampler_func( state=state, action=action) -> Tuple[Tuple[int, float], float]: next_state_price: float = asset_distribution.sample() next_state_time = state[0] + 1 reward: float = 0 return ((next_state_time, next_state_price), reward) return SampledDistribution(sampler=sr_sampler_func, expectation_samples=1000)
def get_states_distribution(self, t: int) -> SampledDistribution[StateType]: spot_mean2: float = self.spot_price * self.spot_price spot_var: float = spot_mean2 * self.spot_price_frac * self.spot_price_frac log_mean: float = np.log(spot_mean2 / np.sqrt(spot_var + spot_mean2)) log_stdev: float = np.sqrt(np.log(spot_var / spot_mean2 + 1)) time: float = t * self.expiry / self.num_steps def states_sampler_func() -> StateType: start: float = np.random.lognormal(log_mean, log_stdev) price = np.exp( np.random.normal( np.log(start) + (self.rate - self.vol * self.vol / 2) * time, self.vol * np.sqrt(time), ) ) return (price, False) return SampledDistribution(states_sampler_func)
def step( self, price_exer: StateType, exer: bool ) -> SampledDistribution[Tuple[StateType, float]]: def sr_sampler_func(price_exer=price_exer, exer=exer) -> Tuple[StateType, float]: price, exercised = price_exer if exercised: ret = ((price, True), 0.) elif exer: ret = ((price, True), exer_payoff(price)) else: next_price: float = np.exp( np.random.normal( np.log(price) + (r - s * s / 2) * dt, s * np.sqrt(dt))) ret = ((next_price, False), 0.) return ret return SampledDistribution(sampler=sr_sampler_func, expectation_samples=200)
def step( self, state: State, action: Action ) -> Optional[Distribution[Tuple[State, float]]]: if state.t>self.T: return None def sampler_func()->Tuple[State,float]: inventory = state.I PnL = state.W proba_inventory_d:float = self.c*np.exp(-self.k*(action.Pa-state.S))*self.delta_t if inventory>=1 and np.random.random()<proba_inventory_d: inventory -=1 PnL+= action.Pa #print(action.Pb) proba_inventory_u:float = self.c*np.exp(-self.k*(-action.Pb+state.S))*self.delta_t if np.random.random()<proba_inventory_u: inventory+=1 PnL -= action.Pb ob_mid_price: float = state.S if np.random.random()<0.5: ob_mid_price += self.sigma*np.sqrt(self.delta_t) if np.random.random()<0.5: ob_mid_price -= self.sigma*np.sqrt(self.delta_t) next_state = State(t = state.t+self.delta_t, S = ob_mid_price, W = PnL, I = inventory) if next_state.t >= self.T: reward = utility_func(next_state.W+next_state.I*next_state.S,self.gamma) else: reward = 0 return (next_state,reward) return SampledDistribution( sampler=sampler_func, expectation_samples=1000 )