class BaseStrategy6(bt.Strategy): """ Added for gen.6: traded asset volatility-based rescaling for all broker statistics and, consequently, reward fn self.p.norm_alpha - tracking smoothing decay parameter added self.p.target_call - upper limit arg. is removed TODO: auto sizer inference, co-integration coeff. inference Controls Environment inner dynamics and backtesting logic. Provides gym'my (State, Action, Reward, Done, Info) data. Any State, Reward and Info computation logic can be implemented by subclassing BTgymStrategy and overriding get_[mode]_state(), get_reward(), get_info(), is_done() and set_datalines() methods. One can always go deeper and override __init__ () and next() methods for desired server cerebro engine behaviour, including order execution logic etc. Note: - base class supports single asset iteration via default data_line named 'base_asset', see derived classes multi-asset support - bt.observers.DrawDown observer will be automatically added to BTgymStrategy instance at runtime. - Since it is bt.Strategy subclass, refer to https://www.backtrader.com/docu/strategy.html for more information. """ # Time embedding period: time_dim = 32 # NOTE: changed this --> change Policy UNREAL for aux. pix control task upsampling params # Number of timesteps reward estimation statistics are averaged over, should be: # skip_frame_period <= avg_period <= time_embedding_period: avg_period = int(time_dim / 2) # Possible agent actions; Note: place 'hold' first! : portfolio_actions = ('hold', 'buy', 'sell', 'close') features_parameters = () num_features = len(features_parameters) params = dict( # Observation state shape is dictionary of Gym spaces, # at least should contain `raw_state` field. # By convention first dimension of every Gym Box space is time embedding one; # one can define any shape; should match env.observation_space.shape. # observation space state min/max values, # For `raw_state' (default) - absolute min/max values from BTgymDataset will be used. state_shape={ 'raw': spaces.Box( shape=(time_dim, 4), low=0, # will get overridden. high=0, dtype=np.float32, ), 'internal': spaces.Box(low=-100, high=100, shape=(avg_period, 1, 5), dtype=np.float32), 'stat': spaces.Box(low=-100, high=100, shape=(2, 1), dtype=np.float32), 'metadata': DictSpace({ 'type': spaces.Box(shape=(), low=0, high=1, dtype=np.uint32), 'trial_num': spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32), 'trial_type': spaces.Box(shape=(), low=0, high=1, dtype=np.uint32), 'sample_num': spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32), 'first_row': spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32), 'timestamp': spaces.Box(shape=(), low=0, high=np.finfo(np.float64).max, dtype=np.float64), }) }, cash_name='default_cash', asset_names=['default_asset'], start_cash=None, commission=None, slippage=None, leverage=1.0, gamma=0.99, # fi_gamma, should match MDP gamma decay reward_scale=1.0, # reward multiplicator norm_alpha=0.001, # renormalisation tracking decay in []0, 1] drawdown_call= 10, # finish episode when hitting drawdown treshghold, in percent to initial cash. dataset_stat= None, # Summary descriptive statistics for entire dataset and episode_stat=None, # current episode. Got updated by server. time_dim=time_dim, # time embedding period avg_period= avg_period, # number of time steps reward estimation statistics are tracked over features_parameters=features_parameters, num_features=num_features, metadata={}, broadcast_message={}, trial_stat=None, trial_metadata=None, portfolio_actions=portfolio_actions, skip_frame= 1, # number of environment steps to skip before returning next environment response order_size=None, initial_action=None, initial_portfolio_action=None, state_int_scale=1, state_ext_scale=1, ) def __init__(self, **kwargs): """ Keyword Args: params (dict): parameters dictionary, see Note below. Notes: Due to backtrader convention, any strategy arguments should be defined inside `params` dictionary or passed as kwargs to bt.Cerebro() class via .addstrategy() method. Parameter dictionary should contain at least these keys:: state_shape: Observation state shape is dictionary of Gym spaces, by convention first dimension of every Gym Box space is time embedding one; cash_name: str, name for cash asset asset_names: iterable of str, names for assets start_cash: float, broker starting cash commission: float, broker commission value, .01 stands for 1% leverage: float, broker leverage slippage: float, broker execution slippage order_size: dict of fixed order stakes (floats); keys should match assets names. drawdown_call: finish episode when hitting this drawdown treshghold , in percent. portfolio_actions: possible agent actions. skip_frame: number of environment steps to skip before returning next response, e.g. if set to 10 -- agent will interact with environment every 10th step; every other step agent action is assumed to be 'hold'. Default values are:: state_shape=dict(raw_state=spaces.Box(shape=(4, 4), low=0, high=0,)) cash_name='default_cash' asset_names=['default_asset'] start_cash=None commission=None slippage=None, leverage=1.0 drawdown_call=10 dataset_stat=None episode_stat=None portfolio_actions=('hold', 'buy', 'sell', 'close') skip_frame=1 order_size=None """ # Inherit logger from cerebro: self.log = self.env._log assert self.p.avg_period + 2 < self.p.time_dim, 'Doh!' self.skip_frame = self.p.skip_frame self.iteration = 0 self.pre_iteration = 0 self.env_iteration = 0 self.inner_embedding = 1 self.is_done = False self.is_done_enabled = False self.steps_till_is_done = 2 # extra steps to make when episode terminal conditions are met self.action = self.p.initial_portfolio_action self.action_to_repeat = self.p.initial_portfolio_action self.action_repeated = 0 self.num_action_repeats = None self.reward = 0 self.order = None self.order_failed = 0 self.broker_message = '_' self.final_message = '_' self.raw_state = None self.time_stamp = 0 # Prepare broker: if self.p.start_cash is not None: self.env.broker.setcash(self.p.start_cash) if self.p.commission is not None: self.env.broker.setcommission(commission=self.p.commission, leverage=self.p.leverage) if self.p.slippage is not None: # Bid/ask workaround: set overkill 10% slippage + slip_out=False # ensuring we always buy at current 'high'~'ask' and sell at 'low'~'bid': self.env.broker.set_slippage_perc(self.p.slippage, slip_open=True, slip_match=True, slip_out=False) # self.target_value = self.env.broker.startingcash * (1 + self.p.target_call / 100) # Try to define stake, if no self.p.order_size dict has been set: if self.p.order_size is None: # If no order size has been set for every data_line, # try to infer stake size from sizer set by bt.Cerebro.addsizer() method: try: assert len(list(self.env.sizers.values())) == 1 env_sizer_params = list(self.env.sizers.values())[0][ -1] # pull dict of outer set sizer params assert 'stake' in env_sizer_params.keys() except (AssertionError, KeyError) as e: msg = 'Order stake is not set neither via strategy.param.order_size nor via bt.Cerebro.addsizer method.' self.log.error(msg) raise ValueError(msg) self.p.order_size = { name: env_sizer_params['stake'] for name in self.p.asset_names } elif isinstance(self.p.order_size, int) or isinstance( self.p.order_size, float): unimodal_stake = { name: self.p.order_size for name in self.getdatanames() } self.p.order_size = unimodal_stake # Current effective order sizes: self.current_order_sizes = None # Current stat normalisation: self.normalizer = 1.0 # self.log.warning('asset names: {}'.format(self.p.asset_names)) # self.log.warning('data names: {}'.format(self.getdatanames())) self.trade_just_closed = False self.trade_result = 0 self.unrealized_pnl = None self.norm_broker_value = None self.realized_pnl = None self.current_pos_duration = 0 self.current_pos_min_value = 0 self.current_pos_max_value = 0 self.realized_broker_value = self.env.broker.startingcash self.episode_result = 0 # not used # Service sma to get correct first features values: self.data.dim_sma = btind.SimpleMovingAverage(self.datas[0], period=self.p.time_dim) self.data.dim_sma.plotinfo.plot = False # self.log.warning('self.p.dir: {}'.format(dir(self.params))) # Episode-wide metadata: self.metadata = { 'type': np.asarray(self.p.metadata['type']), 'trial_num': np.asarray(self.p.metadata['parent_sample_num']), 'trial_type': np.asarray(self.p.metadata['parent_sample_type']), 'sample_num': np.asarray(self.p.metadata['sample_num']), 'first_row': np.asarray(self.p.metadata['first_row']), 'timestamp': np.asarray(self.time_stamp, dtype=np.float64) } self.state = {'raw': None, 'metadata': None} # If it is train or test episode? # default logic: true iff. it is test episode from target domain: self.is_test = self.metadata['type'] and self.metadata['trial_type'] # This flag shows to the outer world if this episode can broadcast world-state information, e.g. move global # time forward (see: btgym.server._BTgymAnalyzer.next() method); self.can_broadcast = self.is_test self.log.debug('strategy.metadata: {}'.format(self.metadata)) self.log.debug('is_test: {}'.format(self.is_test)) # Broker data lines of interest (used for estimation inner state of agent: self.broker_datalines = [ 'cash', 'value', 'exposure', 'drawdown', 'pos_duration', 'realized_pnl', 'unrealized_pnl', 'min_unrealized_pnl', 'max_unrealized_pnl', 'total_unrealized_pnl', ] # Define flat collection dictionary looking up for methods for estimating broker statistics, # one method for one mode, should be named .get_broker_[mode_name](): self.collection_get_broker_stat_methods = {} for line in self.broker_datalines: try: self.collection_get_broker_stat_methods[line] = getattr( self, 'get_broker_{}'.format(line)) except AttributeError: raise NotImplementedError( 'Callable get_broker_{}.() not found'.format(line)) # Broker and account related sliding statistics accumulators: self.broker_stat = { key: np.zeros(self.avg_period) for key in self.broker_datalines } # This data line will be used to by default to # define normalisation bounds (can be overiden via .set_datalines()): self.stat_asset = self.data.open # Add custom data Lines if any [and possibly redefine stat_asset and order_size_normalizer]: self.set_datalines() # Normalisation statistics estimator (updated via update_broker_stat.()): self.norm_stat_tracker = Zscore(1, alpha=self.p.norm_alpha) self.normalisation_state = NormalisationState(0, 0, .9, 1.1) # State exp. smoothing params: self.internal_state_discount = np.cumprod( np.tile(1 - 1 / self.p.avg_period, self.p.avg_period))[::-1] self.external_state_discount = None # not used # Define flat collection dictionary looking for methods for estimating observation state, # one method per one mode, should be named .get_[mode_name]_state(): self.collection_get_state_methods = {} for key in self.p.state_shape.keys(): try: self.collection_get_state_methods[key] = getattr( self, 'get_{}_state'.format(key)) except AttributeError: raise NotImplementedError( 'Callable get_{}_state.() not found'.format(key)) for data in self.datas: self.log.debug('data_name: {}'.format(data._name)) self.log.debug('stake size: {}'.format(self.p.order_size)) # Define how this strategy should handle actions: either as discrete or continuous: if self.p.portfolio_actions is None or set( self.p.portfolio_actions) == {}: # No discrete actions provided, assume continuous: try: assert self.p.skip_frame > 1 except AssertionError: msg = 'For continuous actions it is essential to set `skip_frame` parameter > 1, got: {}'.format( self.p.skip_frame) self.log.error(msg) raise ValueError(msg) # Disable broker checking margin, # see: https://community.backtrader.com/topic/152/multi-asset-ranking-and-rebalancing/2?page=1 self.env.broker.set_checksubmit(False) self.next_process_fn = self._next_target_percent # Repeat action 2 times: self.num_action_repeats = 2 else: # Use discrete handling method otherwise: self.env.broker.set_checksubmit(True) self.next_process_fn = self._next_discrete # self.log.warning('DISCRETE') # Do not repeat action for discrete: self.num_action_repeats = 0 def prenext(self): if self.pre_iteration + 2 > self.p.time_dim - self.avg_period: self.update_broker_stat() elif self.pre_iteration + 2 == self.p.time_dim - self.avg_period: _ = self.norm_stat_tracker.reset( np.asarray(self.stat_asset.get( size=self.data.close.buflen()))[None, :]) self.pre_iteration += 1 def nextstart(self): self.inner_embedding = self.data.close.buflen() # self.log.warning('Inner time embedding: {}'.format(self.inner_embedding)) # for k, v in self.broker_stat.items(): # self.log.warning('{}: {}'.format(k, len(v))) def next(self): """ Default implementation for built-in backtrader method. Defines one step environment routine; Handles order execution logic according to action received. Note that orders can only be submitted for data_lines in action_space (assets). `self.action` attr. is updated by btgym.server._BTgymAnalyzer, and `None` actions are emitted while doing `skip_frame` loop. """ self.update_broker_stat() if '_skip_this' in self.action.keys(): # print('a_skip, b_message: ', self.broker_message) if self.action_repeated < self.num_action_repeats: self.next_process_fn(self.action_to_repeat) self.action_repeated += 1 else: self.next_process_fn(self.action) self.action_repeated = 0 self.action_to_repeat = self.action # print('a_process, b_message: ', self.broker_message) def notify_trade(self, trade): if trade.isclosed: # Set trade flags: True if trade have been closed just now and within last frame-skip period, # and store trade result: self.trade_just_closed = True # Note: `trade_just_closed` flag has to be reset manually after evaluating. self.trade_result += trade.pnlcomm # Store realized prtfolio value: self.realized_broker_value = self.broker.get_value() # self.log.warning('notify_trade: trade_pnl: {}, cum_trade_result: {}, realized_value: {}'.format( # trade.pnlcomm, self.trade_result, self.realized_broker_value) # ) def update_broker_stat(self): """ Updates all sliding broker statistics with latest-step values such as: - normalized broker value - normalized broker cash - normalized exposure (position size) - exp. scaled episode duration in steps, normalized wrt. max possible episode steps - normalized realized profit/loss for last closed trade (is zero if no pos. closures within last env. step) - normalized profit/loss for current opened trade (unrealized p/l); """ # Update current account value: current_value = self.env.broker.get_value() # ...normalisation bounds: norm_state = self.get_normalisation() # ..current order sizes: # order_sizes = self.get_order_sizes() # ...individual positions for each instrument traded: positions = [self.env.broker.getposition(data) for data in self.datas] # ... total cash exposure: exposure = sum([abs(pos.size) for pos in positions]) # ... tracking normalisation constant: self.normalizer = 1 / np.clip( (norm_state.up_interval - norm_state.low_interval), 1e-8, None) # print('norm_state: ', norm_state) # print('normalizer: ', normalizer) # print('self.current_order_sizes: ', self.current_order_sizes) for key, method in self.collection_get_broker_stat_methods.items(): update = method( current_value=current_value, positions=positions, exposure=exposure, lower_bound=norm_state.low_interval, upper_bound=norm_state.up_interval, normalizer=self.normalizer, ) # Update accumulator: self.broker_stat[key] = np.concatenate( [self.broker_stat[key][1:], np.asarray([float(update)])]) # Reset one-time flags: self.trade_just_closed = False self.trade_result = 0 def get_normalisation(self): """ Estimates current normalisation constants, updates `normalisation_state` attr. Returns: instance of NormalisationState tuple """ # Update normalizer stat: stat_data = np.asarray(self.stat_asset.get(size=1)) mean, var = self.norm_stat_tracker.update(stat_data[None, :]) var = np.clip(var, 1e-8, None) # Use 99% N(stat_data_mean, stat_data_std) intervals as normalisation interval: intervals = stats.norm.interval(.99, mean, var**.5) self.normalisation_state = NormalisationState( mean=float(mean), variance=float(var), low_interval=intervals[0][0], up_interval=intervals[1][0]) return self.normalisation_state def get_order_sizes(self): """ Estimates current order sizes for assets in trade, sets attribute. Returns: array-like of floats """ # Default implementation for fixed-size orders: self.current_order_sizes = np.fromiter(self.p.order_size.values(), dtype=np.float) return self.current_order_sizes def get_broker_value(self, current_value, normalizer, **kwargs): """ Args: current_value: float, current portfolio value lower_bound: float, lower normalisation constant upper_bound: float, upper normalisation constant Returns: broker value normalized w.r.t. start value. """ return ( current_value - self.env.broker.startingcash ) / self.env.broker.startingcash / self.p.leverage #* normalizer def get_broker_cash(self, current_value, **kwargs): """ Args: current_value: float, current portfolio value Returns: broker cash normalized w.r.t. current value. """ return self.env.broker.get_cash() / current_value def get_broker_exposure(self, exposure, normalizer, **kwargs): """ Args: exposure: float, current total position exposure Returns: exposure (position size) normalized w.r.t. single order size. """ return exposure * normalizer #/ self.current_order_sizes.mean() def get_broker_realized_pnl(self, normalizer, **kwargs): """ Args: normalizer: float, normalisation constant Returns: normalized realized profit/loss for last closed trade (is zero if no pos. closures within last env. step) """ if self.trade_just_closed: pnl = self.trade_result * normalizer else: pnl = 0.0 return pnl def get_broker_unrealized_pnl(self, current_value, normalizer, **kwargs): """ Args: current_value: float, current portfolio value normalizer: float, normalisation constant Returns: normalized profit/loss for current opened trade """ pnl = (current_value - self.realized_broker_value) * normalizer return pnl def get_broker_total_unrealized_pnl(self, current_value, normalizer, **kwargs): """ REDUNDANT Args: current_value: float, current portfolio value normalizer: float, normalisation constant Returns: normalized profit/loss wrt. initial portfolio value """ pnl = (current_value - self.env.broker.startingcash) * self.env.broker.startingcash return pnl def get_broker_drawdown(self, **kwargs): """ Returns: current drawdown value """ try: dd = self.stats.drawdown.drawdown[-1] / self.p.drawdown_call except IndexError: dd = 0.0 return dd def get_broker_pos_duration(self, exposure, **kwargs): """ Args: exposure: float, current total positions exposure Returns: int, number of ticks current position is being held """ if exposure == 0: self.current_pos_duration = 0 # print('ZERO_POSITION\n') else: self.current_pos_duration += 1 return self.current_pos_duration def get_broker_max_unrealized_pnl(self, current_value, exposure, normalizer, **kwargs): """ Args: exposure: float, current total positions exposure current_value: float, current portfolio value normalizer: float, normalisation constant Returns: best unrealised PnL achieved within current opened position """ if exposure == 0: self.current_pos_max_value = current_value else: if self.current_pos_max_value < current_value: self.current_pos_max_value = current_value pnl = (self.current_pos_max_value - self.realized_broker_value) * normalizer return pnl def get_broker_min_unrealized_pnl(self, current_value, exposure, normalizer, **kwargs): """ Args: exposure: float, current total positions exposure current_value: float, current portfolio value normalizer: float, normalisation constant Returns: worst unrealised PnL achieved within current opened position """ if exposure == 0: self.current_pos_min_value = current_value else: if self.current_pos_min_value > current_value: self.current_pos_min_value = current_value pnl = (self.current_pos_min_value - self.realized_broker_value) * normalizer return pnl def set_datalines(self): """ Default datalines are: Open, Low, High, Close, Volume. Any other custom data lines, indicators, etc. should be explicitly defined by overriding this method. Invoked once by Strategy.__init__(). """ pass def get_raw_state(self): """ Default state observation composer. Returns: and updates time-embedded environment state observation as [n, 4] numpy matrix, where: 4 - number of signal features == state_shape[1], n - time-embedding length == state_shape[0] == <set by user>. Note: `self.raw_state` is used to render environment `human` mode and should not be modified. """ self.raw_state = np.row_stack(( np.frombuffer(self.data.open.get(size=self.time_dim)), np.frombuffer(self.data.high.get(size=self.time_dim)), np.frombuffer(self.data.low.get(size=self.time_dim)), np.frombuffer(self.data.close.get(size=self.time_dim)), )).T return self.raw_state def get_stat_state(self): return np.asarray(self.norm_stat_tracker.get_state()) def get_internal_state(self): stat_lines = ('value', 'unrealized_pnl', 'realized_pnl', 'cash', 'exposure') # Use smoothed values: x_broker = np.stack([ np.asarray(self.broker_stat[name]) * self.internal_state_discount for name in stat_lines ], axis=-1) # x_broker = np.gradient(x_broker, axis=-1) return np.clip(x_broker[:, None, :], -100, 100) def get_metadata_state(self): self.metadata['timestamp'] = np.asarray(self._get_timestamp()) return self.metadata def _get_time(self): """ Retrieves current time point of the episode data. Returns: datetime object """ return self.data.datetime.datetime() def _get_timestamp(self): """ Sets attr. and returns current data timestamp. Returns: POSIX timestamp """ self.time_stamp = self._get_time().timestamp() return self.time_stamp def _get_broadcast_info(self): """ Transmits broadcasting message. Returns: dictionary or None """ try: return self.get_broadcast_message() except AttributeError: return None def get_broadcast_message(self): """ Override this. Returns: dictionary or None """ return None def get_state(self): """ Collects estimated values for every mode of observation space by calling methods from `collection_get_state_methods` dictionary. As a rule, this method should not be modified, override or implement corresponding get_[mode]_state() methods, defining necessary calculations and return properly shaped tensors for every space mode. Note: - 'data' referes to bt.startegy datafeeds and should be treated as such. Datafeed Lines that are not default to BTgymStrategy should be explicitly defined by __init__() or define_datalines(). """ # Update inner state statistic and compose state: <- moved to .next() # self.update_broker_stat() self.state = { key: method() for key, method in self.collection_get_state_methods.items() } return self.state def get_reward(self): """ Shapes reward function as normalized single trade realized profit/loss, augmented with potential-based reward shaping functions in form of: F(s, a, s`) = gamma * FI(s`) - FI(s); Potential FI_1 is current normalized unrealized profit/loss. Paper: "Policy invariance under reward transformations: Theory and application to reward shaping" by A. Ng et al., 1999; http://www.robotics.stanford.edu/~ang/papers/shaping-icml99.pdf """ # All sliding statistics for this step are already updated by get_state(). # Potential-based shaping function 1: # based on potential of averaged profit/loss for current opened trade (unrealized p/l): unrealised_pnl = np.asarray(self.broker_stat['unrealized_pnl']) current_pos_duration = int(self.broker_stat['pos_duration'][-1]) #self.log.warning('current_pos_duration: {}'.format(current_pos_duration)) # We want to estimate potential `fi = gamma*fi_prime - fi` of current opened position, # thus need to consider different cases given skip_fame parameter: if current_pos_duration == 0: # Set potential term to zero if there is no opened positions: f1 = 0 fi_1_prime = 0 else: if current_pos_duration < self.p.skip_frame: fi_1 = 0 fi_1_prime = np.average(unrealised_pnl[-current_pos_duration:]) elif current_pos_duration < 2 * self.p.skip_frame: fi_1 = np.average( unrealised_pnl[-(self.p.skip_frame + current_pos_duration):-self.p.skip_frame]) fi_1_prime = np.average(unrealised_pnl[-self.p.skip_frame:]) else: fi_1 = np.average( unrealised_pnl[-2 * self.p.skip_frame:-self.p.skip_frame]) fi_1_prime = np.average(unrealised_pnl[-self.p.skip_frame:]) # Potential term: f1 = self.p.gamma * fi_1_prime - fi_1 # Main reward function: normalized realized profit/loss: realized_pnl = np.asarray( self.broker_stat['realized_pnl'])[-self.p.skip_frame:].sum() # Weights are subject to tune: self.reward = (0.1 * f1 + 1.0 * realized_pnl) * self.p.reward_scale #/ self.normalizer # self.reward = np.clip(self.reward, -self.p.reward_scale, self.p.reward_scale) self.reward = np.clip(self.reward, -1e3, 1e3) return self.reward def get_info(self): """ Composes information part of environment response, can be any object. Override to own taste. Note: Due to 'skip_frame' feature, INFO part of environment response transmitted by server can be a list containing either all skipped frame's info objects, i.e. [info[-9], info[-8], ..., info[0]] or just latest one, [info[0]]. This behaviour is set inside btgym.server._BTgymAnalyzer().next() method. """ return dict( step=self.iteration, time=self.data.datetime.datetime(), action=self.action, broker_message=self.broker_message, broker_cash=self.stats.broker.cash[0], broker_value=self.stats.broker.value[0], drawdown=self.stats.drawdown.drawdown[0], max_drawdown=self.stats.drawdown.maxdrawdown[0], ) def get_done(self): """ Episode termination estimator, defines any trading logic conditions episode stop is called upon, e.g. <OMG! Stop it, we became too rich!>. It is just a structural a convention method. Default method is empty. Expected to return: tuple (<is_done, type=bool>, <message, type=str>). """ return False, '-' def _get_done(self): """ Default episode termination method, checks base conditions episode stop is called upon: 1. Reached maximum episode duration. Need to check it explicitly, because <self.is_done> flag is sent as part of environment response. 2. Got '_done' signal from outside. E.g. via env.reset() method invoked by outer RL algorithm. 3. Hit `drawdown` threshold. This method shouldn't be overridden or called explicitly. Runtime execution logic is: terminate episode if: get_done() returned (True, 'something') OR ANY _get_done() default condition is met. """ if not self.is_done_enabled: # Episode is on its way, # apply base episode termination rules: is_done_rules = [ # Do we approaching the end of the episode?: (self.iteration >= \ self.data.numrecords - self.inner_embedding - self.p.skip_frame - self.steps_till_is_done, 'END OF DATA'), # Any money left?: (self.stats.drawdown.maxdrawdown[0] >= self.p.drawdown_call, 'DRAWDOWN CALL'), ] # Append custom get_done() results, if any: is_done_rules += [self.get_done()] # Sweep through rules: for (condition, message) in is_done_rules: if condition: # Start episode termination countdown for clean exit: # to forcefully execute final `close` order and compute proper reward # we need to make `steps_till_is_done` number of steps until `is_done` flag can be safely risen: self.is_done_enabled = True self.broker_message += message self.final_message = message self.order = self.close() self.log.debug( 'Episode countdown started at: {}, {}, r:{}'.format( self.iteration, message, self.reward)) else: # Now in episode termination phase, # just keep hitting `Close` button: self.steps_till_is_done -= 1 self.broker_message = 'CLOSE, {}'.format(self.final_message) self.order = self.close() self.log.debug('Episode countdown contd. at: {}, {}, r:{}'.format( self.iteration, self.broker_message, self.reward)) if self.steps_till_is_done <= 0: # Now we've done, terminate: self.is_done = True return self.is_done def notify_order(self, order): """ Shamelessly taken from backtrader tutorial. TODO: better multi data support """ if order.status in [order.Submitted, order.Accepted]: # Buy/Sell order submitted/accepted to/by broker - Nothing to do return # Check if an order has been completed # Attention: broker could reject order if not enough cash if order.status in [order.Completed]: if order.isbuy(): self.broker_message = 'BUY executed,\nPrice: {:.5f}, Cost: {:.4f}, Comm: {:.4f}'. \ format(order.executed.price, order.executed.value, order.executed.comm) self.buyprice = order.executed.price self.buycomm = order.executed.comm else: # Sell self.broker_message = 'SELL executed,\nPrice: {:.5f}, Cost: {:.4f}, Comm: {:.4f}'. \ format(order.executed.price, order.executed.value, order.executed.comm) self.bar_executed = len(self) elif order.status in [order.Canceled, order.Margin, order.Rejected]: self.broker_message = 'ORDER FAILED with status: ' + str( order.getstatusname()) # Rise order_failed flag until get_reward() will [hopefully] use and reset it: self.order_failed += 1 # self.log.warning('BM: {}'.format(self.broker_message)) self.order = None def _next_discrete(self, action): """ Default implementation for discrete actions. Note that orders can be submitted only for data_lines in action_space (assets). Args: action: dict, string encoding of btgym.spaces.ActionDictSpace """ for key, single_action in action.items(): # Simple action-to-order logic: if single_action == 'hold' or self.is_done_enabled: pass elif single_action == 'buy': self.order = self.buy(data=key, size=self.p.order_size[key]) self.broker_message = 'new {}_BUY created; '.format( key) + self.broker_message elif single_action == 'sell': self.order = self.sell(data=key, size=self.p.order_size[key]) self.broker_message = 'new {}_SELL created; '.format( key) + self.broker_message elif single_action == 'close': self.order = self.close(data=key) self.broker_message = 'new {}_CLOSE created; '.format( key) + self.broker_message # Somewhere after this point, server-side _BTgymAnalyzer() is exchanging information with environment wrapper, # obtaining <self.action> , composing and sending <state,reward,done,info> etc... never mind. def _next_target_percent(self, action): """ Uses `order_target_percent` method to rebalance assets to given ratios. Expects action for every asset to be a float scalar in [0,1], with actions sum to 1 over all assets (including base one). Note that action for base asset (cash) is ignored. For details refer to: https://www.backtrader.com/docu/order_target/order_target.html """ # TODO 1: filter similar actions to prevent excessive orders issue e.g by DKL on two consecutive ones # TODO 2: actions discretisation on level of execution for asset in self.p.asset_names: # Reducing assets positions subj to 5% margin reserve: single_action = round(float(action[asset]) * 0.9, 2) self.order = self.order_target_percent(data=asset, target=single_action) self.broker_message += ' new {}->{:1.0f}% created; '.format( asset, single_action * 100)
class PriceModel(TimeSeriesModel): """ Wrapper class for positive-valued time-series. Internally works with normalised log-transformed data. """ def __init__(self, max_length, analyzer_window, analyzer_grouping=None, alpha=None, filter_alpha=None, stat_alpha=None): """ Args: max_length: uint, maximum trajectory length to keep; analyzer_window: uint, SSA embedding window; analyzer_grouping: SSA decomposition triples grouping, iterable of pairs convertible to python slices, i.e.: grouping=[[0,1], [1,2], [2, None]]; alpha: float in [0, 1], SSA and process estimator decaying factor; filter_alpha: float in [0, 1], process smoothing decaying factor; stat_alpha: float in [0, 1], time-series statistics tracking decaying factor; """ super().__init__(max_length, analyzer_window, analyzer_grouping, alpha, filter_alpha) # Statistics of original data: self.stat = Zscore(1, stat_alpha) def get_state(self): """ Returns model state tuple. Returns: current state as instance of PriceModelState """ return PriceModelState( process=self.process.get_state(), analyzer=self.analyzer.get_state(), stat=self.stat.get_state(), ) @staticmethod def normalise(trajectory, mean, variance): return (trajectory - mean) / np.clip(variance, 1e-8, None)**.5 @staticmethod def denormalize(trajectory, mean, variance): return trajectory * variance**.5 + mean def reset(self, init_trajectory): """ Resets model parameters and trajectory given initial data. Args: init_trajectory: initial time-series observations of size from [1] to [num_points] """ log_data = np.log(init_trajectory) mean, variance = self.stat.reset(log_data[None, :]) return super().reset(self.normalise(log_data, mean, variance)) def update(self, trajectory, disjoint=False): """ Updates model parameters and trajectory given new data. Args: trajectory: time-series update observations of size from [1] to [num_points], where num_points <= max_length to keep model trajectory continuous disjoint: bool, indicates whether update given is continuous or disjoint w.r.t. previous one """ log_data = np.log(trajectory) mean, variance = self.stat.update(log_data[None, :]) return super().update(self.normalise(log_data, mean, variance), disjoint) def transform(self, trajectory=None, state=None, size=None): """ Returns analyzer data decomposition. Args: trajectory: data to decompose of size [num_points] or None state: instance of PriceModelState or None size: uint, size of decomposition to get, or None Returns: SSA decomposition of given trajectory w.r.t. given state if no `trajectory` is given - returns stored data decomposition if no `state` arg. is given - uses stored analyzer state. if no 'size` arg is given - decomposes full [stored or given] trajectory """ if state is not None: assert isinstance(state, PriceModelState), \ 'Expected `state` as instance of PriceModelState, got: {}'.format(type(state)) # Unpack state: state_base = TimeSeriesModelState(analyzer=state.analyzer, process=state.process) else: state_base = None # If 1d signal is given - need to normalize: if trajectory is not None: assert state is not None, 'State is expected when trajectory is given' trajectory = self.normalise(np.log(trajectory), state.stat.mean, state.stat.variance) return super().transform(trajectory, state_base, size) def get_trajectory(self, size=None): """ Returns stored fragment of original time-series data. Args: size: uint, fragment length in [1, ..., max_length] or None Returns: 1d series as [ x[-size], x[-size+1], ... x[-1] ], up to length [size]; if no `size` arg. is given - returns entire stored trajectory, up to length [max_length]. """ # TODO: reconstruction is freaky due to only last stored statistic is used trajectory = super().get_trajectory(size) state = self.get_state() return np.exp( self.denormalize(trajectory, state.stat.mean, state.stat.variance)) def generate(self, batch_size, size, state=None, driver_df=None): """ Generates batch of realisations given process parameters. Args: batch_size: uint, number of realisations to draw size: uint, length of each one state: instance PriceModelState or None, model parameters to use driver_df: t-student process driver degree of freedom parameter or None Returns: process realisations batch of size [batch_size, size] """ if state is not None: assert isinstance(state, PriceModelState), \ 'Expected `state` as instance of PriceModelState, got: {}'.format(type(state)) # Unpack: state_base = TimeSeriesModelState(analyzer=state.analyzer, process=state.process) else: state = self.get_state() state_base = None trajectory = super().generate(batch_size, size, state_base, driver_df) return np.exp( self.denormalize(trajectory, state.stat.mean, state.stat.variance)) @staticmethod def get_random_state(p_params, mean=(100, 100), variance=(1, 1)): """ Samples random uniform model state w.r.t. intervals given. Args: p_params: dict, stochastic process parameters, see kwargs at: OUProcess.get_random_state mean: iterable of floats as [0 < lower_bound, upper_bound], time-series means sampling interval. variance: iterable of floats as [0 < lower_bound, upper_bound], time-series variances sampling interval. Returns: instance of PriceModelState with `analyser` set to None Note: negative means are rejected; stochastic process fitted on log_normalized data; """ sample = dict() for name, param, low_threshold in zip([ 'mean', 'variance', ], [mean, variance], [1e-8, 1e-8]): interval = np.asarray(param) assert interval.ndim == 1 and interval[0] <= interval[-1], \ ' Expected param `{}` as iterable of ordered values as: [lower_bound, upper_bound], got: {}'.format( name, interval ) assert interval[0] >= low_threshold, \ 'Expected param `{}` lower bound be no less than {}, got: {}'.format(name, low_threshold, interval[0]) sample[name] = np.random.uniform(low=interval[0], high=interval[-1], size=1) # Log_transform mean and variance (those is biased estimates but ok for rnd. samples): log_variance = np.log(sample['variance'] / sample['mean']**2 + 1) log_mean = np.log(sample['mean']) - .5 * log_variance # Inverse transform memo: # mean = exp(log_mean + 0.5 * log_var) # var = mean**2 * (exp(log_var) -1) return PriceModelState( process=OUProcess.get_random_state(**p_params), analyzer=None, stat=ZscoreState(mean=log_mean, variance=log_variance), )
class BivariateTSModel: """ Two-factor bivariate time-series model. Motivating papers: Eduardo Schwartz, James E. Smith, "Short-Term Variations and Long-Term Dynamics in Commodity Prices", in "Management Science", Vol. 46, No. 7, July 2000 pp. 893–911 Harris, D., "Principal components analysis of cointegrated time series," in "Econometric Theory", Vol. 13, 1997 """ # TODO: trajectory generator uses simplified algorithm: entire trajectory is generated out of single model state # TODO: proper state-space model approach # TODO: should be: sample [randomized?] trajectory of states -> sample realisation trajectory of same length # Decomposition matrix: u_decomp = np.asarray([[.5, .5], [.5, -.5]]) # Reconstruction (inverse u_decomp): u_recon = np.asarray([[1., 1.], [1., -1.]]) def __init__( self, max_length, analyzer_window, p_analyzer_grouping=None, s_analyzer_grouping=None, alpha=None, filter_alpha=None, stat_alpha=None, ps_alpha=None, ): """ Args: max_length: uint, maximum time-series trajectory length to keep; analyzer_window: uint, SSA embedding window (shared for P and S analyzers); p_analyzer_grouping: P process SSA decomposition triples grouping, iterable of pairs convertible to python slices, i.e.: grouping=[[0,1], [1,2], [2, None]]; s_analyzer_grouping: P process SSA decomposition triples grouping, se above; alpha: float in [0, 1], SSA and processes estimators decaying factor; filter_alpha: float in [0, 1], processes smoothing decaying factor; stat_alpha: float in [0, 1], time-series statistics tracking decaying factor; ps_alpha: float in [0, 1], P|S processes covariance tracking decaying factor; """ max_length = np.atleast_1d(max_length) analyzer_window = np.atleast_1d(analyzer_window) alpha = np.atleast_1d(alpha) filter_alpha = np.atleast_1d(filter_alpha) # Max. variance factor component (average): self.p = TimeSeriesModel(max_length[0], analyzer_window[0], p_analyzer_grouping, alpha[0], filter_alpha[0]) # Max. stationarity factor component (difference): self.s = TimeSeriesModel(max_length[-1], analyzer_window[-1], s_analyzer_grouping, alpha[-1], filter_alpha[-1]) # Statistics of original data: self.stat = Zscore(2, stat_alpha) # Stochastic processes covariance: self.ps_stat = Covariance(2, ps_alpha) def ready(self): return self.s.ready() and self.p.ready() def get_state(self): return BivariateTSModelState(p=self.p.get_state(), s=self.s.get_state(), stat=self.stat.get_state(), ps_stat=self.ps_stat.get_state()) @staticmethod def get_random_state(p_params, s_params, mean=(100, 100), variance=(1, 1), ps_corrcoef=(-1, 1)): """ Samples random uniform model state w.r.t. parameters intervals given. Args: p_params: dict, P stochastic process parameters, see kwargs at: OUProcess.get_random_state s_params: dict, S stochastic process parameters, see kwargs at: OUProcess.get_random_state mean: iterable of floats as [lower_bound, upper_bound], time-series means sampling interval. variance: iterable of floats as [lower_bound, upper_bound], time-series variances sampling interval. ps_corrcoef: iterable of floats as [lower_bound, upper_bound], correlation coefficient for P and S process innovations, -1 <= ps_corrcoef <= 1 Returns: instance of BivariateTSModelState Note: negative means are allowed. """ sample = dict() for name, param, low_threshold in zip( ['mean', 'variance', 'ps_corrcoef'], [mean, variance, ps_corrcoef], [-np.inf, 1e-8, -1.0]): interval = np.asarray(param) assert interval.ndim == 1 and interval[0] <= interval[-1], \ ' Expected param `{}` as iterable of ordered values as: [lower_bound, upper_bound], got: {}'.format( name, interval ) assert interval[0] >= low_threshold, \ 'Expected param `{}` lower bound be no less than {}, got: {}'.format(name, low_threshold, interval[0]) sample[name] = np.random.uniform(low=interval[0], high=interval[-1], size=2) # Correlation matrix instead of covariance - it is ok as it gets normalized when sampling anyway: rho = np.eye(2) rho[0, 1] = rho[1, 0] = sample['ps_corrcoef'][0] return BivariateTSModelState( p=TimeSeriesModel.get_random_state(**p_params), s=TimeSeriesModel.get_random_state(**s_params), stat=ZscoreState(mean=sample['mean'], variance=sample['variance']), ps_stat=CovarianceState( mean=np.zeros(2), variance=np.ones(2), covariance=rho, ), ) @staticmethod def _decompose(trajectory, mean, variance, u): """ Returns orthonormal decomposition of pair [X1, X2]. Static method, can be used as stand-along function. Args: trajectory: time-series data of shape [2, num_points] mean: data mean of size [2] variance: data variance of size [2] u: [2, 2] decomposition matrix Returns: data projection of size [2, num_pints], where first (P) component is average and second (S) is difference of original time-series. """ assert len(trajectory.shape) == 2 and trajectory.shape[0] == 2, \ 'Expected data as array of size [2, num_points], got: {}'.format(trajectory.shape) assert mean.shape == (2,) and variance.shape == (2,), \ 'Expected mean and variance as vectors of size [2], got: {}, {}'.format(mean.shape, variance.shape) assert u.shape == (2, 2), 'Expected U as 2x2 matrix, got: {}'.format( u.shape) # Z-score data: norm_data = (trajectory - mean[:, None]) / np.clip( variance[:, None], 1e-8, None)**.5 ps_decomposition = np.matmul(u, norm_data) return ps_decomposition @staticmethod def _reconstruct(ps_decomposition, mean, variance, u): """ Returns original data [X1, X2] given orthonormal P|S decomposition . Static method, can be used as stand-along function. Args: ps_decomposition: data ps-decomposition of size [2, num_points] mean: original data mean of size [2] variance: original data variance of size [2] u: [2, 2] reconstruction matrix Returns: reconstructed data of size [2, num_pints] """ assert len(ps_decomposition.shape) == 2 and ps_decomposition.shape[0] == 2, \ 'Expected data as array of size [2, num_points], got: {}'.format(ps_decomposition.shape) assert mean.shape == (2,) and variance.shape == (2,), \ 'Expected mean and variance as vectors of size [2], got: {}, {}'.format(mean.shape, variance.shape) assert u.shape == (2, 2), 'Expected U as 2x2 matrix, got: {}'.format( u.shape) return np.matmul( u, ps_decomposition) * variance[:, None]**.5 + mean[:, None] def decompose(self, trajectory): """ Returns orthonormal decomposition of pair [X1, X2] w.r.t current statistics. Args: trajectory: time-series data of shape [2, num_points] Returns: tuple (P, S), where first (P) component is average and second (S) is difference of original time-series, of size [num_points] each """ ps_decomp = self._decompose(trajectory, self.stat.mean, self.stat.variance, self.u_decomp) return ps_decomp[0, :], ps_decomp[1, :] def reconstruct(self, p, s, mean=None, variance=None): """ Returns original data [X1, X2] given orthonormal P|S decomposition. Args: p: data p-component of shape [num_points] s: data s-component of shape [num_points] mean: original data mean of size [2] or None variance: original data variance of size [2] or None Returns: reconstructed data of size [2, num_pints] Notes: if either mean or variance arg is not given - stored mean and variance are used. """ assert p.shape == s.shape, ' Expected components be same size but got: {} and {}'.format( p.shape, s.shape) if mean is None or variance is None: mean = self.stat.mean variance = self.stat.variance ps = np.stack([p, s], axis=0) return self._reconstruct(ps, mean, variance, self.u_recon) def reset(self, init_trajectory): """ Resets model parameters and trajectories given initial data. Args: init_trajectory: initial time-series observations of size [2, num_points] """ _ = self.stat.reset(init_trajectory) p_data, s_data = self.decompose(init_trajectory) self.p.reset(p_data) self.s.reset(s_data) residuals = np.stack([ self.p.process.estimator.residuals, self.s.process.estimator.residuals ], axis=0) _ = self.ps_stat.reset(residuals) def update(self, trajectory, disjoint=False): """ Updates model parameters and trajectories given new data. Args: trajectory: time-series update observations of size [2, num_points], where: num_points <= min{p_params[max_length], s_params[max_length]} is necessary to keep model trajectory continuous disjoint: bool, indicates whether update given is continuous or disjoint w.r.t. previous one """ _ = self.stat.update( trajectory ) # todo: this stat.estimator does not respect `disjoint` arg.; ?!! p_data, s_data = self.decompose(trajectory) self.p.update(p_data, disjoint) self.s.update(s_data, disjoint) residuals = np.stack([ self.p.process.estimator.residuals, self.s.process.estimator.residuals ], axis=0) _ = self.ps_stat.update(residuals) def transform(self, trajectory=None, state=None, size=None): """ Returns per-component analyzer data decomposition. Args: trajectory: bivariate data to decompose of size [2, num_points] or None state: instance of BivariateTSModelState or None size: uint, size of decomposition to get, or None Returns: array of [size or num_points], array of [size or num_points], ZscoreState(2) - SSA transformations of P, S components of given trajectory w.r.t. given state - bivariate trajectory statistics (means and variances) Notes: if no `trajectory` is given - returns stored data decomposition if no `state` arg. is given - uses stored analyzer state. if no 'size` arg is given - decomposes full [stored or given] trajectory """ if state is not None: assert isinstance(state, BivariateTSModelState),\ 'Expected `state as instance of BivariateTSModelState but got: {}`'.format(type(state)) s_state = state.s p_state = state.p stat = state.stat else: assert trajectory is None, 'When `trajectory` arg. is given, `state` is required' p_state = None s_state = None stat = self.stat.get_state() if trajectory is not None: ps_data = self._decompose(trajectory, stat.mean, stat.variance, self.u_decomp) p_data = ps_data[0, :] s_data = ps_data[1, :] else: p_data = None s_data = None p_transformed = self.p.transform(p_data, p_state, size) s_transformed = self.s.transform(s_data, s_state, size) return p_transformed, s_transformed, stat def get_trajectory(self, size=None, reconstruct=True): """ Returns stored decomposition fragment and [optionally] time-series reconstruction. TODO: reconstruction is freaky due to only last stored statistic is used Args: size: uint, fragment length to get in [1, ..., max_length] or None reconstruct: bool, if True - also return data reconstruction Returns: array of [size ... max_length], array of [size ... max_length], array of size [2, size ... max_length] or array of [size ... max_length], array of [size ... max_length], None P,C [, and 2D trajectory] series as [ x[-size], x[-size+1], ... x[-1] ], up to length [size]; if no `size` arg. is given - returns entire stored trajectory, up to length [max_length]. """ p_data = self.p.get_trajectory(size) s_data = self.s.get_trajectory(size) if reconstruct: trajectory = self.reconstruct(p_data, s_data) else: trajectory = None return p_data, s_data, trajectory @staticmethod def generate_trajectory_fn(batch_size, size, state, reconstruct=False, u_recon=None): """ Generates batch of time-series realisations given model state. Static method, can be used as stand-along function. Args: batch_size: uint, number of trajectories to generates size: uint, trajectory length to generate state: instance of BivariateTSModelState; reconstruct: bool, if True - return time-series along with P, S trajectories, return None otherwise u_recon: reconstruction matrix of size [2, 2] or None; required if reconstruct=True; Returns: generated P and S processes realisations of size [batch_size, 2, size]; generated time-series reconstructions of size [batch_size, 2, size] or None; """ assert isinstance(state, BivariateTSModelState), \ 'Expected `state` as instance of BivariateTSModelState, got: {}'.format(type(state)) if reconstruct: assert u_recon is not None, 'reconstruct=True but reconstruction matrix is not provided.' # Unpack: p_state = state.p.process s_state = state.s.process # Get all samples for single batch (faster): p_params = OUProcess.sample_naive_unbiased(p_state, batch_size) s_params = OUProcess.sample_naive_unbiased(s_state, batch_size) # Concatenate batch-wise: parameters = OUEstimatorState( mu=np.concatenate([p_params.mu, s_params.mu]), log_theta=np.concatenate([p_params.log_theta, s_params.log_theta]), log_sigma=np.concatenate([p_params.log_sigma, s_params.log_sigma]), ) driver_df = np.concatenate([ np.tile(p_state.driver_df, batch_size), np.tile(s_state.driver_df, batch_size), ]) # Access multivariate generator_fn directly to get batch of bivariate OU: batch_2x = OUProcess.generate_trajectory_fn(2 * batch_size, size, parameters, driver_df) batch_2x = np.reshape(batch_2x, [2, batch_size, -1]) batch_2x = np.swapaxes(batch_2x, 0, 1) if reconstruct: x = np.matmul(u_recon, batch_2x) * state.stat.variance[None, :, None] ** .5 \ + state.stat.mean[None, :, None] else: x = None return batch_2x, x @staticmethod def generate_bivariate_trajectory_fn(batch_size, size, state, reconstruct=False, u_recon=None): """ Generates batch of time-series realisations given model state. Static method, can be used as stand-along function. Args: batch_size: uint, number of trajectories to generates size: uint, trajectory length to generate state: instance of BivariateTSModelState; reconstruct: bool, if True - return time-series along with P, S trajectories, return None otherwise u_recon: reconstruction matrix of size [2, 2] or None; required if reconstruct=True; Returns: generated P and S processes realisations of size [batch_size, 2, size]; generated time-series reconstructions of size [batch_size, 2, size] or None; """ assert isinstance(state, BivariateTSModelState), \ 'Expected `state` as instance of BivariateTSModelState, got: {}'.format(type(state)) if reconstruct: assert u_recon is not None, 'reconstruct=True but reconstruction matrix is not provided.' # Unpack: p_state = state.p.process s_state = state.s.process # Get all samples for single batch (faster): p_params = OUProcess.sample_naive_unbiased(p_state, 1) s_params = OUProcess.sample_naive_unbiased(s_state, 1) # Concatenate batch-wise: parameters = OUEstimatorState( mu=np.concatenate([p_params.mu, s_params.mu]), log_theta=np.concatenate([p_params.log_theta, s_params.log_theta]), log_sigma=np.concatenate([p_params.log_sigma, s_params.log_sigma]), ) driver_df = np.asarray([p_state.driver_df, s_state.driver_df]) # Access multivariate generator_fn directly to get batch of 2d correlated OU's: batch_2d = OUProcess.generate_multivariate_trajectory_fn( batch_size=batch_size, size=size, parameters=parameters, t_df=driver_df, covariance=state.ps_stat.covariance) batch_2d = np.swapaxes(batch_2d, 1, 2) if reconstruct: x = np.matmul(u_recon, batch_2d) * state.stat.variance[None, :, None] ** .5 \ + state.stat.mean[None, :, None] else: x = None return batch_2d, x def generate(self, batch_size, size, state=None, reconstruct=True): """ Generates batch of time-series realisations given model state. Args: batch_size: uint, number of trajectories to generates size: uint, trajectory length to generate state: instance of BivariateTSModelState or None; if no state provided - current state is used. reconstruct: bool, if True - return time-series along with P, S trajectories, return None otherwise Returns: generated P and S processes realisations of size [batch_size, 2, size]; generated time-series reconstructions of size [batch_size, 2, size] or None; """ if state is None: # Fit student-t df: _ = self.p.process.driver_estimator.fit() _ = self.s.process.driver_estimator.fit() state = self.get_state() # return self.generate_trajectory_fn(batch_size, size, state, reconstruct, self.u_recon) return self.generate_bivariate_trajectory_fn(batch_size, size, state, reconstruct, self.u_recon)
class OUProcess: """ Provides essential functionality for recursive time series modeling as Ornshteinh-Uhlenbeck stochastic process: parameters estimation, state filtering and sampling, trajectories generation. """ def __init__(self, alpha=None, filter_alpha=None): self.alpha = alpha self.filter_alpha = filter_alpha self.estimator = OUEstimator(alpha) # Just use exponential smoothing as state-space trajectory filter: self.filter = Covariance(3, alpha=filter_alpha) # Driver is Student-t: self.driver_estimator = STEstimator(alpha) # Empirical statistics tracker (mostly for accuracy checking, not included in OUProcessState): self.stat = Zscore(1, alpha) self.is_ready = False def ready(self): assert self.is_ready, 'OUProcess is not initialized. Hint: forgot to call .reset()?' def get_state(self): """ Returns model state tuple. Returns: current state as instance of OUProcessState """ self.ready() return OUProcessState( observation=self.estimator.get_state(), filtered=self.filter.get_state(), driver_df=self.driver_estimator.df, ) @staticmethod def get_random_state(mu=(0, 0), theta=(.1, 1), sigma=(0.1, 1), driver_df=(3, 50), variance=1e-2): """ Samples random uniform process state w.r.t. parameters intervals given. Args: mu: iterable of floats as [lower_bound, upper_bound], OU Mu sampling interval theta: iterable of positive floats as [lower_bound, upper_bound], OU Theta sampling interval sigma: iterable of positive floats as [lower_bound, upper_bound], OU Sigma sampling interval driver_df: iterable of positive floats as [lower_bound > 2, upper_bound], student-t driver degrees of freedom sampling interval variance: filtered observation variance (same fixed for all params., covariance assumed diagonal) Returns: instance of OUProcessState """ sample = dict() for name, param, low_threshold in zip( ['mu', 'theta', 'sigma', 'driver_df'], [mu, theta, sigma, driver_df], [-np.inf, 1e-8, 1e-8, 2.999]): interval = np.asarray(param) assert interval.ndim == 1 and interval[0] <= interval[-1], \ ' Expected param `{}` as iterable of ordered values as: [lower_bound, upper_bound], got: {}'.format( name, interval ) assert interval[0] > low_threshold, \ 'Expected param `{}` lower bound be bigger than {}, got: {}'.format(name, low_threshold, interval[0]) sample[name] = np.random.uniform(low=interval[0], high=interval[-1]) observation = OUEstimatorState(mu=sample['mu'], log_theta=np.log(sample['theta']), log_sigma=np.log(sample['sigma'])) filtered = CovarianceState( mean=np.asarray(observation), variance=np.ones(3) * variance, covariance=np.eye(3) * variance, ) return OUProcessState( observation=observation, filtered=filtered, driver_df=sample['driver_df'], ) def fit_driver(self, trajectory=None): """ Updates Student-t driver shape parameter. Needs entire trajectory for correct estimation. TODO: make recursive update. Args: trajectory: full observed data of size ~[max_length] or None Returns: Estimated shape parameter. """ self.ready() driver_df, _, _ = self.driver_estimator.fit(trajectory) return driver_df def reset(self, init_trajectory): """ Resets model parameters for process dX = -Theta *(X - Mu) + Sigma * dW and starts new trajectory given initial data. Args: init_trajectory: initial 1D process observations trajectory of size [num_points] """ _ = self.stat.reset(init_trajectory[None, :]) init_observation = np.asarray(self.estimator.reset(init_trajectory)) # 2x observation to get initial covariance matrix estimate: init_observation = np.stack([init_observation, init_observation], axis=-1) _ = self.filter.reset(init_observation) self.driver_estimator.reset(self.estimator.residuals) self.is_ready = True def update(self, trajectory, disjoint=False): """ Updates model parameters estimates for process dX = -Theta *(X - Mu) + Sigma * dW given new observations. Args: trajectory: 1D process observations trajectory update of size [num_points] disjoint: bool, indicates whether update given is continuous or disjoint w.r.t. previous one """ self.ready() _ = self.stat.update( trajectory[None, :]) # todo: disjoint is ignored or reset stat? # Get new state-space observation: observation = self.estimator.update(trajectory, disjoint) # Smooth and make it random variable: _ = self.filter.update(np.asarray(observation)[:, None]) # Residuals distr. shape update but do not fit: self.driver_estimator.update(self.estimator.residuals) @staticmethod def sample_from_filtered(filter_state, size=1): """ Samples process parameters values given smoothed observations. Static method, can be used as stand-along function. Args: filter_state: instance of CovarianceState of dimensionality 3 size: int or None, number of samples to draw Returns: sampled process parameters of size [size] each, packed as OUEstimatorState tuple """ assert isinstance(filter_state, CovarianceState),\ 'Expected filter_state as instance of CovarianceState, got: {}'.format(type(filter_state)) sample = np.random.multivariate_normal(filter_state.mean, filter_state.covariance, size=size) return OUEstimatorState( mu=sample[:, 0], log_theta=sample[:, 1], log_sigma=sample[:, 2], ) @staticmethod def sample_naive_unbiased(state, size=1): """ Samples process parameters values given observed values and smoothed covariance. Static method, can be used as stand-along function. Args: state: instance of OUProcessState size: int or None, number of samples to draw Returns: sampled process parameters of size [size] each, packed as OUEstimatorState tuple """ assert isinstance(state, OUProcessState), \ 'Expected filter_state as instance of `OUProcessState`, got: {}'.format(type(state)) # naive_mean = (np.asarray(state.observation) + state.filtered.mean) / 2 naive_mean = np.asarray(state.observation) sample = np.random.multivariate_normal(naive_mean, state.filtered.covariance, size=size) return OUEstimatorState( mu=sample[:, 0], log_theta=sample[:, 1], log_sigma=sample[:, 2], ) def sample_parameters(self, state=None, size=1): """ Samples process parameters values given process state; Args: state: instance of OUProcessState or None; if no state provided - current state is used; size: number of samples to draw; Returns: sampled process parameters of size [size] each, packed as OUEstimatorState tuple """ if state is None: state = self.get_state() else: assert isinstance(state, OUProcessState),\ 'Expected state as instance of OUProcessState, got: {}'.format(type(state)) # return self.sample_from_filtered(state.filtered, size=size) return self.sample_naive_unbiased(state, size=size) @staticmethod def generate_trajectory_fn(batch_size, size, parameters, t_df): """ Generates batch of univariate process realisations given process parameters. Static method, can be used as stand-along function. Args: batch_size: uint, number of trajectories to generates size: uint, trajectory length to generate parameters: instance of OUEstimatorState of size [batch_size] for each parameter t_df: float > 3.0, driver shape param. Returns: process realisations as 2d array of size [batch_size, size] """ assert isinstance(parameters, OUEstimatorState), \ 'Expected `parameters` as instance of OUEstimatorState, got: {}'.format(type(parameters)) for param in parameters: assert param.shape[0] == batch_size,\ 'Given `parameters` length: {} and `batch_size`: {} does not match.'.format(param.shape[0], batch_size) if isinstance(t_df, float) or isinstance(t_df, int): t_df = np.tile(t_df, batch_size) else: assert t_df.shape[0] == batch_size, \ 'Given `t_df` parameters length: {} and `batch_size`: {} does not match.'.format(t_df.shape[0], batch_size) trajectory = ou_process_t_driver_batch_fn( size, mu=parameters.mu, l=np.exp(parameters.log_theta), sigma=np.exp(parameters.log_sigma), df=t_df, x0=parameters.mu, ) return trajectory.T @staticmethod def generate_multivariate_trajectory_fn(batch_size, size, parameters, t_df, covariance): """ Generates batch of realisations of multivariate Ornshtein-Uhlenbeck process. Note differences in parameters dimensionality w.r.t. univarite case! Static method, can be used as stand-along function. Args: batch_size: uint, number of trajectories to generates size: uint, trajectory length to generate parameters: instance of OUEstimatorState of size [process_dim] for each parameter t_df: array_like, driver shape param. vector of size [process_dim] covariance: process innovations covariance matrix of size [process_dim, process_dim] Returns: process realisations as array of size [batch_size, size, process_dim] """ assert isinstance(parameters, OUEstimatorState), \ 'Expected `parameters` as instance of OUEstimatorState, got: {}'.format(type(parameters)) trajectory = multivariate_ou_process_t_driver_batch_fn( batch_size=batch_size, num_points=size, mu=parameters.mu, theta=np.exp(parameters.log_theta), sigma=np.exp(parameters.log_sigma), cov=covariance, df=t_df, x0=parameters.mu, ) return trajectory def generate(self, batch_size, size, state=None, driver_df=None): """ Generates batch of realisations given process state. Args: batch_size: uint, number of trajectories to generates size: uint, trajectory length to generate state: instance of OUProcessState or None; if no state provided - current state is used. driver_df: t-student process driver degree of freedom parameter or None; if no value provided - current value is used; Returns: process realisations of size [batch_size, size] """ self.ready() parameters = self.sample_parameters(state, size=batch_size) if driver_df is None: driver_df, _, _ = self.driver_estimator.fit() print('driver_df: ', driver_df) return self.generate_trajectory_fn(batch_size, size, parameters, driver_df)