Python Zscore.update Exemples, btgym.research.model_based.model.rec.Zscore.update Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : base.py Projet : mysl/btgym

class BaseStrategy6(bt.Strategy):
    """
    Added for gen.6:
        traded asset volatility-based rescaling for all broker statistics and, consequently, reward fn
        self.p.norm_alpha - tracking smoothing decay parameter added
        self.p.target_call  - upper limit arg. is removed
        TODO: auto sizer inference, co-integration coeff. inference

    Controls Environment inner dynamics and backtesting logic. Provides gym'my (State, Action, Reward, Done, Info) data.
    Any State, Reward and Info computation logic can be implemented by subclassing BTgymStrategy and overriding
    get_[mode]_state(), get_reward(), get_info(), is_done() and set_datalines() methods.
    One can always go deeper and override __init__ () and next() methods for desired
    server cerebro engine behaviour, including order execution logic etc.

    Note:
        - base class supports single asset iteration via default data_line named 'base_asset', see derived classes
          multi-asset support
        - bt.observers.DrawDown observer will be automatically added to BTgymStrategy instance at runtime.
        - Since it is bt.Strategy subclass, refer to https://www.backtrader.com/docu/strategy.html for more information.
    """
    # Time embedding period:
    time_dim = 32  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = int(time_dim / 2)

    # Possible agent actions;  Note: place 'hold' first! :
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    features_parameters = ()
    num_features = len(features_parameters)

    params = dict(
        # Observation state shape is dictionary of Gym spaces,
        # at least should contain `raw_state` field.
        # By convention first dimension of every Gym Box space is time embedding one;
        # one can define any shape; should match env.observation_space.shape.
        # observation space state min/max values,
        # For `raw_state' (default) - absolute min/max values from BTgymDataset will be used.
        state_shape={
            'raw':
            spaces.Box(
                shape=(time_dim, 4),
                low=0,  # will get overridden.
                high=0,
                dtype=np.float32,
            ),
            'internal':
            spaces.Box(low=-100,
                       high=100,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'stat':
            spaces.Box(low=-100, high=100, shape=(2, 1), dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        slippage=None,
        leverage=1.0,
        gamma=0.99,  # fi_gamma, should match MDP gamma decay
        reward_scale=1.0,  # reward multiplicator
        norm_alpha=0.001,  # renormalisation tracking decay in []0, 1]
        drawdown_call=
        10,  # finish episode when hitting drawdown treshghold, in percent to initial cash.
        dataset_stat=
        None,  # Summary descriptive statistics for entire dataset and
        episode_stat=None,  # current episode. Got updated by server.
        time_dim=time_dim,  # time embedding period
        avg_period=
        avg_period,  # number of time steps reward estimation statistics are tracked over
        features_parameters=features_parameters,
        num_features=num_features,
        metadata={},
        broadcast_message={},
        trial_stat=None,
        trial_metadata=None,
        portfolio_actions=portfolio_actions,
        skip_frame=
        1,  # number of environment steps to skip before returning next environment response
        order_size=None,
        initial_action=None,
        initial_portfolio_action=None,
        state_int_scale=1,
        state_ext_scale=1,
    )

    def __init__(self, **kwargs):
        """
        Keyword Args:
            params (dict):          parameters dictionary, see Note below.

            Notes:
                Due to backtrader convention, any strategy arguments should be defined inside `params` dictionary
                or passed as kwargs to bt.Cerebro() class via .addstrategy() method. Parameter dictionary
                should contain at least these keys::

                    state_shape:        Observation state shape is dictionary of Gym spaces, by convention
                                        first dimension of every Gym Box space is time embedding one;
                    cash_name:          str, name for cash asset
                    asset_names:        iterable of str, names for assets
                    start_cash:         float, broker starting cash
                    commission:         float, broker commission value, .01 stands for 1%
                    leverage:           float, broker leverage
                    slippage:           float, broker execution slippage
                    order_size:         dict of fixed order stakes (floats); keys should match assets names.
                    drawdown_call:      finish episode when hitting this drawdown treshghold , in percent.
                    portfolio_actions:  possible agent actions.
                    skip_frame:         number of environment steps to skip before returning next response,
                                        e.g. if set to 10 -- agent will interact with environment every 10th step;
                                        every other step agent action is assumed to be 'hold'.

                Default values are::

                    state_shape=dict(raw_state=spaces.Box(shape=(4, 4), low=0, high=0,))
                    cash_name='default_cash'
                    asset_names=['default_asset']
                    start_cash=None
                    commission=None
                    slippage=None,
                    leverage=1.0
                    drawdown_call=10
                    dataset_stat=None
                    episode_stat=None
                    portfolio_actions=('hold', 'buy', 'sell', 'close')
                    skip_frame=1
                    order_size=None
        """
        # Inherit logger from cerebro:
        self.log = self.env._log

        assert self.p.avg_period + 2 < self.p.time_dim, 'Doh!'

        self.skip_frame = self.p.skip_frame

        self.iteration = 0
        self.pre_iteration = 0
        self.env_iteration = 0
        self.inner_embedding = 1
        self.is_done = False
        self.is_done_enabled = False
        self.steps_till_is_done = 2  # extra steps to make when episode terminal conditions are met
        self.action = self.p.initial_portfolio_action
        self.action_to_repeat = self.p.initial_portfolio_action
        self.action_repeated = 0
        self.num_action_repeats = None
        self.reward = 0
        self.order = None
        self.order_failed = 0
        self.broker_message = '_'
        self.final_message = '_'
        self.raw_state = None
        self.time_stamp = 0

        # Prepare broker:
        if self.p.start_cash is not None:
            self.env.broker.setcash(self.p.start_cash)

        if self.p.commission is not None:
            self.env.broker.setcommission(commission=self.p.commission,
                                          leverage=self.p.leverage)

        if self.p.slippage is not None:
            # Bid/ask workaround: set overkill 10% slippage + slip_out=False
            # ensuring we always buy at current 'high'~'ask' and sell at 'low'~'bid':
            self.env.broker.set_slippage_perc(self.p.slippage,
                                              slip_open=True,
                                              slip_match=True,
                                              slip_out=False)

        # self.target_value = self.env.broker.startingcash * (1 + self.p.target_call / 100)

        # Try to define stake, if no self.p.order_size dict has been set:
        if self.p.order_size is None:
            # If no order size has been set for every data_line,
            # try to infer stake size from sizer set by bt.Cerebro.addsizer() method:
            try:
                assert len(list(self.env.sizers.values())) == 1
                env_sizer_params = list(self.env.sizers.values())[0][
                    -1]  # pull dict of outer set sizer params
                assert 'stake' in env_sizer_params.keys()

            except (AssertionError, KeyError) as e:
                msg = 'Order stake is not set neither via strategy.param.order_size nor via bt.Cerebro.addsizer method.'
                self.log.error(msg)
                raise ValueError(msg)

            self.p.order_size = {
                name: env_sizer_params['stake']
                for name in self.p.asset_names
            }

        elif isinstance(self.p.order_size, int) or isinstance(
                self.p.order_size, float):
            unimodal_stake = {
                name: self.p.order_size
                for name in self.getdatanames()
            }
            self.p.order_size = unimodal_stake

        # Current effective order sizes:
        self.current_order_sizes = None

        # Current stat normalisation:
        self.normalizer = 1.0

        # self.log.warning('asset names: {}'.format(self.p.asset_names))
        # self.log.warning('data names: {}'.format(self.getdatanames()))

        self.trade_just_closed = False
        self.trade_result = 0

        self.unrealized_pnl = None
        self.norm_broker_value = None
        self.realized_pnl = None

        self.current_pos_duration = 0
        self.current_pos_min_value = 0
        self.current_pos_max_value = 0

        self.realized_broker_value = self.env.broker.startingcash
        self.episode_result = 0  # not used

        # Service sma to get correct first features values:
        self.data.dim_sma = btind.SimpleMovingAverage(self.datas[0],
                                                      period=self.p.time_dim)
        self.data.dim_sma.plotinfo.plot = False

        # self.log.warning('self.p.dir: {}'.format(dir(self.params)))

        # Episode-wide metadata:
        self.metadata = {
            'type': np.asarray(self.p.metadata['type']),
            'trial_num': np.asarray(self.p.metadata['parent_sample_num']),
            'trial_type': np.asarray(self.p.metadata['parent_sample_type']),
            'sample_num': np.asarray(self.p.metadata['sample_num']),
            'first_row': np.asarray(self.p.metadata['first_row']),
            'timestamp': np.asarray(self.time_stamp, dtype=np.float64)
        }
        self.state = {'raw': None, 'metadata': None}

        # If it is train or test episode?
        # default logic: true iff. it is test episode from target domain:
        self.is_test = self.metadata['type'] and self.metadata['trial_type']

        # This flag shows to the outer world if this episode can broadcast world-state information, e.g. move global
        # time forward (see: btgym.server._BTgymAnalyzer.next() method);
        self.can_broadcast = self.is_test

        self.log.debug('strategy.metadata: {}'.format(self.metadata))
        self.log.debug('is_test: {}'.format(self.is_test))

        # Broker data lines of interest (used for estimation inner state of agent:
        self.broker_datalines = [
            'cash',
            'value',
            'exposure',
            'drawdown',
            'pos_duration',
            'realized_pnl',
            'unrealized_pnl',
            'min_unrealized_pnl',
            'max_unrealized_pnl',
            'total_unrealized_pnl',
        ]
        # Define flat collection dictionary looking up for methods for estimating broker statistics,
        # one method for one mode, should be named .get_broker_[mode_name]():
        self.collection_get_broker_stat_methods = {}
        for line in self.broker_datalines:
            try:
                self.collection_get_broker_stat_methods[line] = getattr(
                    self, 'get_broker_{}'.format(line))

            except AttributeError:
                raise NotImplementedError(
                    'Callable get_broker_{}.() not found'.format(line))

        # Broker and account related sliding statistics accumulators:
        self.broker_stat = {
            key: np.zeros(self.avg_period)
            for key in self.broker_datalines
        }

        # This data line will be used to by default to
        # define normalisation bounds (can be overiden via .set_datalines()):
        self.stat_asset = self.data.open

        # Add custom data Lines if any [and possibly redefine stat_asset and order_size_normalizer]:
        self.set_datalines()

        # Normalisation statistics estimator (updated via update_broker_stat.()):
        self.norm_stat_tracker = Zscore(1, alpha=self.p.norm_alpha)
        self.normalisation_state = NormalisationState(0, 0, .9, 1.1)

        # State exp. smoothing params:
        self.internal_state_discount = np.cumprod(
            np.tile(1 - 1 / self.p.avg_period, self.p.avg_period))[::-1]
        self.external_state_discount = None  # not used

        # Define flat collection dictionary looking for methods for estimating observation state,
        # one method per one mode, should be named .get_[mode_name]_state():
        self.collection_get_state_methods = {}
        for key in self.p.state_shape.keys():
            try:
                self.collection_get_state_methods[key] = getattr(
                    self, 'get_{}_state'.format(key))

            except AttributeError:
                raise NotImplementedError(
                    'Callable get_{}_state.() not found'.format(key))

        for data in self.datas:
            self.log.debug('data_name: {}'.format(data._name))

        self.log.debug('stake size: {}'.format(self.p.order_size))

        # Define how this strategy should handle actions: either as discrete or continuous:
        if self.p.portfolio_actions is None or set(
                self.p.portfolio_actions) == {}:
            # No discrete actions provided, assume continuous:
            try:
                assert self.p.skip_frame > 1

            except AssertionError:
                msg = 'For continuous actions it is essential to set `skip_frame` parameter > 1, got: {}'.format(
                    self.p.skip_frame)
                self.log.error(msg)
                raise ValueError(msg)
            # Disable broker checking margin,
            # see: https://community.backtrader.com/topic/152/multi-asset-ranking-and-rebalancing/2?page=1
            self.env.broker.set_checksubmit(False)
            self.next_process_fn = self._next_target_percent
            # Repeat action 2 times:
            self.num_action_repeats = 2

        else:
            # Use discrete handling method otherwise:
            self.env.broker.set_checksubmit(True)
            self.next_process_fn = self._next_discrete
            # self.log.warning('DISCRETE')
            # Do not repeat action for discrete:
            self.num_action_repeats = 0

    def prenext(self):
        if self.pre_iteration + 2 > self.p.time_dim - self.avg_period:
            self.update_broker_stat()

        elif self.pre_iteration + 2 == self.p.time_dim - self.avg_period:
            _ = self.norm_stat_tracker.reset(
                np.asarray(self.stat_asset.get(
                    size=self.data.close.buflen()))[None, :])

        self.pre_iteration += 1

    def nextstart(self):
        self.inner_embedding = self.data.close.buflen()
        # self.log.warning('Inner time embedding: {}'.format(self.inner_embedding))
        # for k, v in self.broker_stat.items():
        #     self.log.warning('{}: {}'.format(k, len(v)))

    def next(self):
        """
        Default implementation for built-in backtrader method.
        Defines one step environment routine;
        Handles order execution logic according to action received.
        Note that orders can only be submitted for data_lines in action_space (assets).
        `self.action` attr. is updated by btgym.server._BTgymAnalyzer, and `None` actions
        are emitted while doing `skip_frame` loop.
        """
        self.update_broker_stat()

        if '_skip_this' in self.action.keys():
            # print('a_skip, b_message: ', self.broker_message)
            if self.action_repeated < self.num_action_repeats:
                self.next_process_fn(self.action_to_repeat)
                self.action_repeated += 1

        else:
            self.next_process_fn(self.action)
            self.action_repeated = 0
            self.action_to_repeat = self.action
            # print('a_process, b_message: ', self.broker_message)

    def notify_trade(self, trade):
        if trade.isclosed:
            # Set trade flags: True if trade have been closed just now and within last frame-skip period,
            # and store trade result:
            self.trade_just_closed = True
            # Note: `trade_just_closed` flag has to be reset manually after evaluating.
            self.trade_result += trade.pnlcomm

            # Store realized prtfolio value:
            self.realized_broker_value = self.broker.get_value()
            # self.log.warning('notify_trade: trade_pnl: {}, cum_trade_result: {}, realized_value: {}'.format(
            #     trade.pnlcomm, self.trade_result, self.realized_broker_value)
            # )

    def update_broker_stat(self):
        """
        Updates all sliding broker statistics with latest-step values such as:
            - normalized broker value
            - normalized broker cash
            - normalized exposure (position size)
            - exp. scaled episode duration in steps, normalized wrt. max possible episode steps
            - normalized realized profit/loss for last closed trade (is zero if no pos. closures within last env. step)
            - normalized profit/loss for current opened trade (unrealized p/l);
        """
        # Update current account value:
        current_value = self.env.broker.get_value()

        # ...normalisation bounds:
        norm_state = self.get_normalisation()

        # ..current order sizes:

        # order_sizes = self.get_order_sizes()

        # ...individual positions for each instrument traded:
        positions = [self.env.broker.getposition(data) for data in self.datas]

        # ... total cash exposure:
        exposure = sum([abs(pos.size) for pos in positions])

        # ... tracking normalisation constant:

        self.normalizer = 1 / np.clip(
            (norm_state.up_interval - norm_state.low_interval), 1e-8, None)

        # print('norm_state: ', norm_state)
        # print('normalizer: ', normalizer)
        # print('self.current_order_sizes: ', self.current_order_sizes)

        for key, method in self.collection_get_broker_stat_methods.items():
            update = method(
                current_value=current_value,
                positions=positions,
                exposure=exposure,
                lower_bound=norm_state.low_interval,
                upper_bound=norm_state.up_interval,
                normalizer=self.normalizer,
            )
            # Update accumulator:
            self.broker_stat[key] = np.concatenate(
                [self.broker_stat[key][1:],
                 np.asarray([float(update)])])

        # Reset one-time flags:
        self.trade_just_closed = False
        self.trade_result = 0

    def get_normalisation(self):
        """
        Estimates current normalisation constants, updates `normalisation_state` attr.

        Returns:
            instance of NormalisationState tuple
        """
        # Update normalizer stat:
        stat_data = np.asarray(self.stat_asset.get(size=1))
        mean, var = self.norm_stat_tracker.update(stat_data[None, :])
        var = np.clip(var, 1e-8, None)

        # Use 99% N(stat_data_mean, stat_data_std) intervals as normalisation interval:
        intervals = stats.norm.interval(.99, mean, var**.5)
        self.normalisation_state = NormalisationState(
            mean=float(mean),
            variance=float(var),
            low_interval=intervals[0][0],
            up_interval=intervals[1][0])
        return self.normalisation_state

    def get_order_sizes(self):
        """
        Estimates current order sizes for assets in trade, sets attribute.

        Returns:
            array-like of floats
        """
        # Default implementation for fixed-size orders:
        self.current_order_sizes = np.fromiter(self.p.order_size.values(),
                                               dtype=np.float)
        return self.current_order_sizes

    def get_broker_value(self, current_value, normalizer, **kwargs):
        """

        Args:
            current_value:  float, current portfolio value
            lower_bound:    float, lower normalisation constant
            upper_bound:    float, upper normalisation constant

        Returns:
            broker value normalized w.r.t. start value.
        """
        return (
            current_value - self.env.broker.startingcash
        ) / self.env.broker.startingcash / self.p.leverage  #* normalizer

    def get_broker_cash(self, current_value, **kwargs):
        """
        Args:
            current_value:    float, current portfolio value

        Returns:
            broker cash normalized w.r.t. current value.
        """
        return self.env.broker.get_cash() / current_value

    def get_broker_exposure(self, exposure, normalizer, **kwargs):
        """
        Args:
            exposure:   float, current total position exposure

        Returns:
            exposure (position size) normalized w.r.t. single order size.
        """
        return exposure * normalizer  #/ self.current_order_sizes.mean()

    def get_broker_realized_pnl(self, normalizer, **kwargs):
        """

        Args:
            normalizer:     float, normalisation constant

        Returns:
            normalized realized profit/loss for last closed trade (is zero if no pos. closures within last env. step)
        """

        if self.trade_just_closed:
            pnl = self.trade_result * normalizer

        else:
            pnl = 0.0
        return pnl

    def get_broker_unrealized_pnl(self, current_value, normalizer, **kwargs):
        """

        Args:
            current_value:  float, current portfolio value
            normalizer:     float, normalisation constant

        Returns:
            normalized profit/loss for current opened trade
        """
        pnl = (current_value - self.realized_broker_value) * normalizer

        return pnl

    def get_broker_total_unrealized_pnl(self, current_value, normalizer,
                                        **kwargs):
        """
        REDUNDANT
        Args:
            current_value:  float, current portfolio value
            normalizer:     float, normalisation constant


        Returns:
            normalized profit/loss wrt. initial portfolio value
        """
        pnl = (current_value -
               self.env.broker.startingcash) * self.env.broker.startingcash

        return pnl

    def get_broker_drawdown(self, **kwargs):
        """

        Returns:
            current drawdown value
        """
        try:
            dd = self.stats.drawdown.drawdown[-1] / self.p.drawdown_call
        except IndexError:
            dd = 0.0
        return dd

    def get_broker_pos_duration(self, exposure, **kwargs):
        """

        Args:
            exposure:   float, current total positions exposure

        Returns:
            int, number of ticks current position is being held
        """
        if exposure == 0:
            self.current_pos_duration = 0
            # print('ZERO_POSITION\n')

        else:
            self.current_pos_duration += 1

        return self.current_pos_duration

    def get_broker_max_unrealized_pnl(self, current_value, exposure,
                                      normalizer, **kwargs):
        """

        Args:
            exposure:       float, current total positions exposure
            current_value:  float, current portfolio value
            normalizer:     float, normalisation constant

        Returns:
            best unrealised PnL achieved within current opened position

        """
        if exposure == 0:
            self.current_pos_max_value = current_value

        else:
            if self.current_pos_max_value < current_value:
                self.current_pos_max_value = current_value

        pnl = (self.current_pos_max_value -
               self.realized_broker_value) * normalizer

        return pnl

    def get_broker_min_unrealized_pnl(self, current_value, exposure,
                                      normalizer, **kwargs):
        """

        Args:
            exposure:       float, current total positions exposure
            current_value:  float, current portfolio value
            normalizer:     float, normalisation constant

        Returns:
            worst unrealised PnL achieved within current opened position
        """
        if exposure == 0:
            self.current_pos_min_value = current_value

        else:
            if self.current_pos_min_value > current_value:
                self.current_pos_min_value = current_value

        pnl = (self.current_pos_min_value -
               self.realized_broker_value) * normalizer

        return pnl

    def set_datalines(self):
        """
        Default datalines are: Open, Low, High, Close, Volume.
        Any other custom data lines, indicators, etc. should be explicitly defined by overriding this method.
        Invoked once by Strategy.__init__().
        """
        pass

    def get_raw_state(self):
        """
        Default state observation composer.

        Returns:
             and updates time-embedded environment state observation as [n, 4] numpy matrix, where:
                4 - number of signal features  == state_shape[1],
                n - time-embedding length  == state_shape[0] == <set by user>.

        Note:
            `self.raw_state` is used to render environment `human` mode and should not be modified.

        """
        self.raw_state = np.row_stack((
            np.frombuffer(self.data.open.get(size=self.time_dim)),
            np.frombuffer(self.data.high.get(size=self.time_dim)),
            np.frombuffer(self.data.low.get(size=self.time_dim)),
            np.frombuffer(self.data.close.get(size=self.time_dim)),
        )).T

        return self.raw_state

    def get_stat_state(self):
        return np.asarray(self.norm_stat_tracker.get_state())

    def get_internal_state(self):
        stat_lines = ('value', 'unrealized_pnl', 'realized_pnl', 'cash',
                      'exposure')
        # Use smoothed values:
        x_broker = np.stack([
            np.asarray(self.broker_stat[name]) * self.internal_state_discount
            for name in stat_lines
        ],
                            axis=-1)
        # x_broker = np.gradient(x_broker, axis=-1)
        return np.clip(x_broker[:, None, :], -100, 100)

    def get_metadata_state(self):
        self.metadata['timestamp'] = np.asarray(self._get_timestamp())

        return self.metadata

    def _get_time(self):
        """
        Retrieves current time point of the episode data.

        Returns:
            datetime object
        """
        return self.data.datetime.datetime()

    def _get_timestamp(self):
        """
        Sets attr. and returns current data timestamp.

        Returns:
            POSIX timestamp
        """
        self.time_stamp = self._get_time().timestamp()

        return self.time_stamp

    def _get_broadcast_info(self):
        """
        Transmits broadcasting message.

        Returns:
            dictionary  or None
        """
        try:
            return self.get_broadcast_message()

        except AttributeError:
            return None

    def get_broadcast_message(self):
        """
        Override this.

        Returns:
            dictionary or None
        """
        return None

    def get_state(self):
        """
        Collects estimated values for every mode of observation space by calling methods from
        `collection_get_state_methods` dictionary.
        As a rule, this method should not be modified, override or implement corresponding get_[mode]_state() methods,
        defining necessary calculations and return properly shaped tensors for every space mode.

        Note:
            - 'data' referes to bt.startegy datafeeds and should be treated as such.
                Datafeed Lines that are not default to BTgymStrategy should be explicitly defined by
                 __init__() or define_datalines().
        """
        # Update inner state statistic and compose state: <- moved to .next()
        # self.update_broker_stat()
        self.state = {
            key: method()
            for key, method in self.collection_get_state_methods.items()
        }
        return self.state

    def get_reward(self):
        """
        Shapes reward function as normalized single trade realized profit/loss,
        augmented with potential-based reward shaping functions in form of:
        F(s, a, s`) = gamma * FI(s`) - FI(s);
        Potential FI_1 is current normalized unrealized profit/loss.

        Paper:
            "Policy invariance under reward transformations:
             Theory and application to reward shaping" by A. Ng et al., 1999;
             http://www.robotics.stanford.edu/~ang/papers/shaping-icml99.pdf
        """

        # All sliding statistics for this step are already updated by get_state().

        # Potential-based shaping function 1:
        # based on potential of averaged profit/loss for current opened trade (unrealized p/l):
        unrealised_pnl = np.asarray(self.broker_stat['unrealized_pnl'])
        current_pos_duration = int(self.broker_stat['pos_duration'][-1])

        #self.log.warning('current_pos_duration: {}'.format(current_pos_duration))

        # We want to estimate potential `fi = gamma*fi_prime - fi` of current opened position,
        # thus need to consider different cases given skip_fame parameter:
        if current_pos_duration == 0:
            # Set potential term to zero if there is no opened positions:
            f1 = 0
            fi_1_prime = 0
        else:
            if current_pos_duration < self.p.skip_frame:
                fi_1 = 0
                fi_1_prime = np.average(unrealised_pnl[-current_pos_duration:])

            elif current_pos_duration < 2 * self.p.skip_frame:
                fi_1 = np.average(
                    unrealised_pnl[-(self.p.skip_frame +
                                     current_pos_duration):-self.p.skip_frame])
                fi_1_prime = np.average(unrealised_pnl[-self.p.skip_frame:])

            else:
                fi_1 = np.average(
                    unrealised_pnl[-2 * self.p.skip_frame:-self.p.skip_frame])
                fi_1_prime = np.average(unrealised_pnl[-self.p.skip_frame:])

            # Potential term:
            f1 = self.p.gamma * fi_1_prime - fi_1

        # Main reward function: normalized realized profit/loss:
        realized_pnl = np.asarray(
            self.broker_stat['realized_pnl'])[-self.p.skip_frame:].sum()

        # Weights are subject to tune:
        self.reward = (0.1 * f1 + 1.0 *
                       realized_pnl) * self.p.reward_scale  #/ self.normalizer
        # self.reward = np.clip(self.reward, -self.p.reward_scale, self.p.reward_scale)
        self.reward = np.clip(self.reward, -1e3, 1e3)

        return self.reward

    def get_info(self):
        """
        Composes information part of environment response,
        can be any object. Override to own taste.

        Note:
            Due to 'skip_frame' feature, INFO part of environment response transmitted by server can be  a list
            containing either all skipped frame's info objects, i.e. [info[-9], info[-8], ..., info[0]] or
            just latest one, [info[0]]. This behaviour is set inside btgym.server._BTgymAnalyzer().next() method.
        """
        return dict(
            step=self.iteration,
            time=self.data.datetime.datetime(),
            action=self.action,
            broker_message=self.broker_message,
            broker_cash=self.stats.broker.cash[0],
            broker_value=self.stats.broker.value[0],
            drawdown=self.stats.drawdown.drawdown[0],
            max_drawdown=self.stats.drawdown.maxdrawdown[0],
        )

    def get_done(self):
        """
        Episode termination estimator,
        defines any trading logic conditions episode stop is called upon, e.g. <OMG! Stop it, we became too rich!>.
        It is just a structural a convention method. Default method is empty.

        Expected to return:
            tuple (<is_done, type=bool>, <message, type=str>).
        """
        return False, '-'

    def _get_done(self):
        """
        Default episode termination method,
        checks base conditions episode stop is called upon:
            1. Reached maximum episode duration. Need to check it explicitly, because <self.is_done> flag
               is sent as part of environment response.
            2. Got '_done' signal from outside. E.g. via env.reset() method invoked by outer RL algorithm.
            3. Hit `drawdown` threshold.

        This method shouldn't be overridden or called explicitly.

        Runtime execution logic is:
            terminate episode if:
                get_done() returned (True, 'something')
                OR
                ANY _get_done() default condition is met.
        """
        if not self.is_done_enabled:
            # Episode is on its way,
            # apply base episode termination rules:
            is_done_rules = [
                # Do we approaching the end of the episode?:
                (self.iteration >= \
                 self.data.numrecords - self.inner_embedding - self.p.skip_frame - self.steps_till_is_done,
                 'END OF DATA'),
                # Any money left?:
                (self.stats.drawdown.maxdrawdown[0] >= self.p.drawdown_call, 'DRAWDOWN CALL'),
            ]
            # Append custom get_done() results, if any:
            is_done_rules += [self.get_done()]

            # Sweep through rules:
            for (condition, message) in is_done_rules:
                if condition:
                    # Start episode termination countdown for clean exit:
                    # to forcefully execute final `close` order and compute proper reward
                    # we need to make `steps_till_is_done` number of steps until `is_done` flag can be safely risen:
                    self.is_done_enabled = True
                    self.broker_message += message
                    self.final_message = message
                    self.order = self.close()
                    self.log.debug(
                        'Episode countdown started at: {}, {}, r:{}'.format(
                            self.iteration, message, self.reward))

        else:
            # Now in episode termination phase,
            # just keep hitting `Close` button:
            self.steps_till_is_done -= 1
            self.broker_message = 'CLOSE, {}'.format(self.final_message)
            self.order = self.close()
            self.log.debug('Episode countdown contd. at: {}, {}, r:{}'.format(
                self.iteration, self.broker_message, self.reward))

        if self.steps_till_is_done <= 0:
            # Now we've done, terminate:
            self.is_done = True

        return self.is_done

    def notify_order(self, order):
        """
        Shamelessly taken from backtrader tutorial.
        TODO: better multi data support
        """
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.broker_message = 'BUY executed,\nPrice: {:.5f}, Cost: {:.4f}, Comm: {:.4f}'. \
                    format(order.executed.price,
                           order.executed.value,
                           order.executed.comm)
                self.buyprice = order.executed.price
                self.buycomm = order.executed.comm

            else:  # Sell
                self.broker_message = 'SELL executed,\nPrice: {:.5f}, Cost: {:.4f}, Comm: {:.4f}'. \
                    format(order.executed.price,
                           order.executed.value,
                           order.executed.comm)
            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.broker_message = 'ORDER FAILED with status: ' + str(
                order.getstatusname())
            # Rise order_failed flag until get_reward() will [hopefully] use and reset it:
            self.order_failed += 1
        # self.log.warning('BM: {}'.format(self.broker_message))
        self.order = None

    def _next_discrete(self, action):
        """
        Default implementation for discrete actions.
        Note that orders can be submitted only for data_lines in action_space (assets).

        Args:
            action:     dict, string encoding of btgym.spaces.ActionDictSpace

        """
        for key, single_action in action.items():
            # Simple action-to-order logic:
            if single_action == 'hold' or self.is_done_enabled:
                pass
            elif single_action == 'buy':
                self.order = self.buy(data=key, size=self.p.order_size[key])
                self.broker_message = 'new {}_BUY created; '.format(
                    key) + self.broker_message
            elif single_action == 'sell':
                self.order = self.sell(data=key, size=self.p.order_size[key])
                self.broker_message = 'new {}_SELL created; '.format(
                    key) + self.broker_message
            elif single_action == 'close':
                self.order = self.close(data=key)
                self.broker_message = 'new {}_CLOSE created; '.format(
                    key) + self.broker_message

        # Somewhere after this point, server-side _BTgymAnalyzer() is exchanging information with environment wrapper,
        # obtaining <self.action> , composing and sending <state,reward,done,info> etc... never mind.

    def _next_target_percent(self, action):
        """
        Uses `order_target_percent` method to rebalance assets to given ratios. Expects action for every asset to be
        a float scalar in [0,1], with actions sum to 1 over all assets (including base one).
        Note that action for base asset (cash) is ignored.
        For details refer to: https://www.backtrader.com/docu/order_target/order_target.html
        """
        # TODO 1: filter similar actions to prevent excessive orders issue e.g by DKL on two consecutive ones
        # TODO 2: actions discretisation on level of execution
        for asset in self.p.asset_names:
            # Reducing assets positions subj to 5% margin reserve:
            single_action = round(float(action[asset]) * 0.9, 2)
            self.order = self.order_target_percent(data=asset,
                                                   target=single_action)
            self.broker_message += ' new {}->{:1.0f}% created; '.format(
                asset, single_action * 100)

Exemple #2

0

Afficher le fichier

class PriceModel(TimeSeriesModel):
    """
    Wrapper class for positive-valued time-series.
    Internally works with normalised log-transformed data.
    """
    def __init__(self,
                 max_length,
                 analyzer_window,
                 analyzer_grouping=None,
                 alpha=None,
                 filter_alpha=None,
                 stat_alpha=None):
        """

        Args:
            max_length:         uint, maximum trajectory length to keep;
            analyzer_window:    uint, SSA embedding window;
            analyzer_grouping:  SSA decomposition triples grouping,
                                iterable of pairs convertible to python slices, i.e.:
                                grouping=[[0,1], [1,2], [2, None]];
            alpha:              float in [0, 1], SSA and process estimator decaying factor;
            filter_alpha:       float in [0, 1], process smoothing decaying factor;
            stat_alpha:         float in [0, 1], time-series statistics tracking decaying factor;
        """
        super().__init__(max_length, analyzer_window, analyzer_grouping, alpha,
                         filter_alpha)

        # Statistics of original data:
        self.stat = Zscore(1, stat_alpha)

    def get_state(self):
        """
        Returns model state tuple.

        Returns:
            current state as instance of PriceModelState
        """
        return PriceModelState(
            process=self.process.get_state(),
            analyzer=self.analyzer.get_state(),
            stat=self.stat.get_state(),
        )

    @staticmethod
    def normalise(trajectory, mean, variance):
        return (trajectory - mean) / np.clip(variance, 1e-8, None)**.5

    @staticmethod
    def denormalize(trajectory, mean, variance):
        return trajectory * variance**.5 + mean

    def reset(self, init_trajectory):
        """
        Resets model parameters and trajectory given initial data.

        Args:
            init_trajectory:    initial time-series observations of size from [1] to [num_points]
        """
        log_data = np.log(init_trajectory)
        mean, variance = self.stat.reset(log_data[None, :])
        return super().reset(self.normalise(log_data, mean, variance))

    def update(self, trajectory, disjoint=False):
        """
        Updates model parameters and trajectory given new data.

        Args:
            trajectory: time-series update observations of size from [1] to [num_points],
                        where num_points <= max_length to keep model trajectory continuous
            disjoint:   bool, indicates whether update given is continuous or disjoint w.r.t. previous one
        """
        log_data = np.log(trajectory)
        mean, variance = self.stat.update(log_data[None, :])
        return super().update(self.normalise(log_data, mean, variance),
                              disjoint)

    def transform(self, trajectory=None, state=None, size=None):
        """
        Returns analyzer data decomposition.

        Args:
            trajectory:     data to decompose of size [num_points] or None
            state:          instance of PriceModelState or None
            size:           uint, size of decomposition to get, or None

        Returns:
            SSA decomposition of given trajectory w.r.t. given state
            if no `trajectory` is given - returns stored data decomposition
            if no `state` arg. is given - uses stored analyzer state.
            if no 'size` arg is given - decomposes full [stored or given] trajectory
        """
        if state is not None:
            assert isinstance(state, PriceModelState), \
                'Expected `state` as instance of PriceModelState, got: {}'.format(type(state))
            # Unpack state:
            state_base = TimeSeriesModelState(analyzer=state.analyzer,
                                              process=state.process)
        else:
            state_base = None

        # If 1d signal is given - need to normalize:
        if trajectory is not None:
            assert state is not None, 'State is expected when trajectory is given'
            trajectory = self.normalise(np.log(trajectory), state.stat.mean,
                                        state.stat.variance)

        return super().transform(trajectory, state_base, size)

    def get_trajectory(self, size=None):
        """
        Returns stored fragment of original time-series data.

        Args:
            size:   uint, fragment length in [1, ..., max_length] or None

        Returns:
            1d series as [ x[-size], x[-size+1], ... x[-1] ], up to length [size];
            if no `size` arg. is given - returns entire stored trajectory, up to length [max_length].
        """
        # TODO: reconstruction is freaky due to only last stored statistic is used
        trajectory = super().get_trajectory(size)
        state = self.get_state()

        return np.exp(
            self.denormalize(trajectory, state.stat.mean, state.stat.variance))

    def generate(self, batch_size, size, state=None, driver_df=None):
        """
        Generates batch of realisations given process parameters.

        Args:
            batch_size:     uint, number of realisations to draw
            size:           uint, length of each one
            state:          instance PriceModelState or None, model parameters to use
            driver_df:      t-student process driver degree of freedom parameter or None

        Returns:
            process realisations batch of size [batch_size, size]
        """
        if state is not None:
            assert isinstance(state, PriceModelState), \
                'Expected `state` as instance of PriceModelState, got: {}'.format(type(state))
            # Unpack:
            state_base = TimeSeriesModelState(analyzer=state.analyzer,
                                              process=state.process)
        else:
            state = self.get_state()
            state_base = None

        trajectory = super().generate(batch_size, size, state_base, driver_df)

        return np.exp(
            self.denormalize(trajectory, state.stat.mean, state.stat.variance))

    @staticmethod
    def get_random_state(p_params, mean=(100, 100), variance=(1, 1)):
        """
        Samples random uniform model state w.r.t. intervals given.

        Args:
            p_params:       dict, stochastic process parameters, see kwargs at: OUProcess.get_random_state
            mean:           iterable of floats as [0 < lower_bound, upper_bound], time-series means sampling interval.
            variance:       iterable of floats as [0 < lower_bound, upper_bound], time-series variances sampling interval.

        Returns:
            instance of PriceModelState with `analyser` set to None

        Note:
            negative means are rejected;
            stochastic process fitted on log_normalized data;
        """
        sample = dict()
        for name, param, low_threshold in zip([
                'mean',
                'variance',
        ], [mean, variance], [1e-8, 1e-8]):
            interval = np.asarray(param)
            assert interval.ndim == 1 and interval[0] <= interval[-1], \
                ' Expected param `{}` as iterable of ordered values as: [lower_bound, upper_bound], got: {}'.format(
                    name, interval
                )
            assert interval[0] >= low_threshold, \
                'Expected param `{}` lower bound be no less than {}, got: {}'.format(name, low_threshold, interval[0])

            sample[name] = np.random.uniform(low=interval[0],
                                             high=interval[-1],
                                             size=1)

        # Log_transform mean and variance (those is biased estimates but ok for rnd. samples):
        log_variance = np.log(sample['variance'] / sample['mean']**2 + 1)
        log_mean = np.log(sample['mean']) - .5 * log_variance

        # Inverse transform memo:
        # mean = exp(log_mean + 0.5 * log_var)
        # var = mean**2 * (exp(log_var) -1)

        return PriceModelState(
            process=OUProcess.get_random_state(**p_params),
            analyzer=None,
            stat=ZscoreState(mean=log_mean, variance=log_variance),
        )

Exemple #3

0

Afficher le fichier

class BivariateTSModel:
    """
    Two-factor bivariate time-series model.

    Motivating papers:
        Eduardo Schwartz, James E. Smith, "Short-Term Variations and Long-Term Dynamics in Commodity Prices",
        in "Management Science", Vol. 46, No. 7, July 2000 pp. 893–911

        Harris, D., "Principal components analysis of cointegrated time series," in "Econometric Theory", Vol. 13, 1997
    """
    # TODO: trajectory generator uses simplified algorithm: entire trajectory is generated out of single model state
    # TODO: proper state-space model approach
    # TODO: should be: sample [randomized?] trajectory of states -> sample realisation trajectory of same length
    # Decomposition matrix:
    u_decomp = np.asarray([[.5, .5], [.5, -.5]])

    # Reconstruction (inverse u_decomp):
    u_recon = np.asarray([[1., 1.], [1., -1.]])

    def __init__(
        self,
        max_length,
        analyzer_window,
        p_analyzer_grouping=None,
        s_analyzer_grouping=None,
        alpha=None,
        filter_alpha=None,
        stat_alpha=None,
        ps_alpha=None,
    ):
        """

        Args:
            max_length:             uint, maximum time-series trajectory length to keep;
            analyzer_window:        uint, SSA embedding window (shared for P and S analyzers);
            p_analyzer_grouping:    P process SSA decomposition triples grouping,
                                    iterable of pairs convertible to python slices, i.e.:
                                    grouping=[[0,1], [1,2], [2, None]];
            s_analyzer_grouping:    P process SSA decomposition triples grouping, se above;
            alpha:                  float in [0, 1], SSA and processes estimators decaying factor;
            filter_alpha:           float in [0, 1], processes smoothing decaying factor;
            stat_alpha:             float in [0, 1], time-series statistics tracking decaying factor;
            ps_alpha:               float in [0, 1], P|S processes covariance tracking decaying factor;
        """
        max_length = np.atleast_1d(max_length)
        analyzer_window = np.atleast_1d(analyzer_window)
        alpha = np.atleast_1d(alpha)
        filter_alpha = np.atleast_1d(filter_alpha)

        # Max. variance factor component (average):
        self.p = TimeSeriesModel(max_length[0], analyzer_window[0],
                                 p_analyzer_grouping, alpha[0],
                                 filter_alpha[0])

        # Max. stationarity factor component (difference):
        self.s = TimeSeriesModel(max_length[-1], analyzer_window[-1],
                                 s_analyzer_grouping, alpha[-1],
                                 filter_alpha[-1])

        # Statistics of original data:
        self.stat = Zscore(2, stat_alpha)

        # Stochastic processes covariance:
        self.ps_stat = Covariance(2, ps_alpha)

    def ready(self):
        return self.s.ready() and self.p.ready()

    def get_state(self):
        return BivariateTSModelState(p=self.p.get_state(),
                                     s=self.s.get_state(),
                                     stat=self.stat.get_state(),
                                     ps_stat=self.ps_stat.get_state())

    @staticmethod
    def get_random_state(p_params,
                         s_params,
                         mean=(100, 100),
                         variance=(1, 1),
                         ps_corrcoef=(-1, 1)):
        """
        Samples random uniform model state w.r.t. parameters intervals given.

        Args:
            p_params:       dict, P stochastic process parameters, see kwargs at: OUProcess.get_random_state
            s_params:       dict, S stochastic process parameters, see kwargs at: OUProcess.get_random_state
            mean:           iterable of floats as [lower_bound, upper_bound], time-series means sampling interval.
            variance:       iterable of floats as [lower_bound, upper_bound], time-series variances sampling interval.
            ps_corrcoef:    iterable of floats as [lower_bound, upper_bound], correlation coefficient
                            for P and S process innovations, -1 <= ps_corrcoef <= 1

        Returns:
            instance of BivariateTSModelState

        Note:
            negative means are allowed.
        """
        sample = dict()
        for name, param, low_threshold in zip(
            ['mean', 'variance', 'ps_corrcoef'], [mean, variance, ps_corrcoef],
            [-np.inf, 1e-8, -1.0]):
            interval = np.asarray(param)
            assert interval.ndim == 1 and interval[0] <= interval[-1], \
                ' Expected param `{}` as iterable of ordered values as: [lower_bound, upper_bound], got: {}'.format(
                    name, interval
                )
            assert interval[0] >= low_threshold, \
                'Expected param `{}` lower bound be no less than {}, got: {}'.format(name, low_threshold, interval[0])

            sample[name] = np.random.uniform(low=interval[0],
                                             high=interval[-1],
                                             size=2)

        # Correlation matrix instead of covariance - it is ok as it gets normalized when sampling anyway:
        rho = np.eye(2)
        rho[0, 1] = rho[1, 0] = sample['ps_corrcoef'][0]

        return BivariateTSModelState(
            p=TimeSeriesModel.get_random_state(**p_params),
            s=TimeSeriesModel.get_random_state(**s_params),
            stat=ZscoreState(mean=sample['mean'], variance=sample['variance']),
            ps_stat=CovarianceState(
                mean=np.zeros(2),
                variance=np.ones(2),
                covariance=rho,
            ),
        )

    @staticmethod
    def _decompose(trajectory, mean, variance, u):
        """
        Returns orthonormal decomposition of pair [X1, X2].
        Static method, can be used as stand-along function.

        Args:
            trajectory: time-series data of shape [2, num_points]
            mean:       data mean of size [2]
            variance:   data variance of size [2]
            u:          [2, 2] decomposition matrix

        Returns:
            data projection of size [2, num_pints], where first (P) component is average and second (S) is difference
            of original time-series.
        """
        assert len(trajectory.shape) == 2 and trajectory.shape[0] == 2, \
            'Expected data as array of size [2, num_points], got: {}'.format(trajectory.shape)

        assert mean.shape == (2,) and variance.shape == (2,), \
            'Expected mean and variance as vectors of size [2], got: {}, {}'.format(mean.shape, variance.shape)

        assert u.shape == (2, 2), 'Expected U as 2x2 matrix, got: {}'.format(
            u.shape)

        # Z-score data:
        norm_data = (trajectory - mean[:, None]) / np.clip(
            variance[:, None], 1e-8, None)**.5
        ps_decomposition = np.matmul(u, norm_data)

        return ps_decomposition

    @staticmethod
    def _reconstruct(ps_decomposition, mean, variance, u):
        """
        Returns original data [X1, X2] given orthonormal P|S decomposition .
        Static method, can be used as stand-along function.

        Args:
            ps_decomposition:   data ps-decomposition of size [2, num_points]
            mean:               original data mean of size [2]
            variance:           original data variance of size [2]
            u:                  [2, 2] reconstruction matrix

        Returns:
            reconstructed data of size [2, num_pints]
        """
        assert len(ps_decomposition.shape) == 2 and ps_decomposition.shape[0] == 2, \
            'Expected data as array of size [2, num_points], got: {}'.format(ps_decomposition.shape)

        assert mean.shape == (2,) and variance.shape == (2,), \
            'Expected mean and variance as vectors of size [2], got: {}, {}'.format(mean.shape, variance.shape)

        assert u.shape == (2, 2), 'Expected U as 2x2 matrix, got: {}'.format(
            u.shape)

        return np.matmul(
            u, ps_decomposition) * variance[:, None]**.5 + mean[:, None]

    def decompose(self, trajectory):
        """
        Returns orthonormal decomposition of pair [X1, X2] w.r.t current statistics.

        Args:
            trajectory: time-series data of shape [2, num_points]

        Returns:
            tuple (P, S), where first (P) component is average and second (S) is difference
            of original time-series, of size [num_points] each
        """
        ps_decomp = self._decompose(trajectory, self.stat.mean,
                                    self.stat.variance, self.u_decomp)
        return ps_decomp[0, :], ps_decomp[1, :]

    def reconstruct(self, p, s, mean=None, variance=None):
        """
        Returns original data [X1, X2] given orthonormal P|S decomposition.

        Args:
            p:          data p-component of shape [num_points]
            s:          data s-component of shape [num_points]
            mean:       original data mean of size [2] or None
            variance:   original data variance of size [2] or None

        Returns:
            reconstructed data of size [2, num_pints]

        Notes:
            if either mean or variance arg is not given - stored mean and variance are used.
        """
        assert p.shape == s.shape, ' Expected components be same size but got: {} and {}'.format(
            p.shape, s.shape)

        if mean is None or variance is None:
            mean = self.stat.mean
            variance = self.stat.variance

        ps = np.stack([p, s], axis=0)
        return self._reconstruct(ps, mean, variance, self.u_recon)

    def reset(self, init_trajectory):
        """
        Resets model parameters and trajectories given initial data.

        Args:
            init_trajectory:    initial time-series observations of size [2, num_points]
        """
        _ = self.stat.reset(init_trajectory)
        p_data, s_data = self.decompose(init_trajectory)
        self.p.reset(p_data)
        self.s.reset(s_data)
        residuals = np.stack([
            self.p.process.estimator.residuals,
            self.s.process.estimator.residuals
        ],
                             axis=0)
        _ = self.ps_stat.reset(residuals)

    def update(self, trajectory, disjoint=False):
        """
        Updates model parameters and trajectories given new data.

        Args:
            trajectory: time-series update observations of size [2, num_points], where:
                        num_points <= min{p_params[max_length], s_params[max_length]} is necessary
                        to keep model trajectory continuous
            disjoint:   bool, indicates whether update given is continuous or disjoint w.r.t. previous one
        """
        _ = self.stat.update(
            trajectory
        )  # todo: this stat.estimator does not respect `disjoint` arg.; ?!!
        p_data, s_data = self.decompose(trajectory)
        self.p.update(p_data, disjoint)
        self.s.update(s_data, disjoint)
        residuals = np.stack([
            self.p.process.estimator.residuals,
            self.s.process.estimator.residuals
        ],
                             axis=0)
        _ = self.ps_stat.update(residuals)

    def transform(self, trajectory=None, state=None, size=None):
        """
        Returns per-component analyzer data decomposition.

        Args:
            trajectory:     bivariate data to decompose of size [2, num_points] or None
            state:          instance of BivariateTSModelState or None
            size:           uint, size of decomposition to get, or None

        Returns:
            array of [size or num_points], array of [size or num_points], ZscoreState(2)

            - SSA transformations of P, S components of given trajectory w.r.t. given state
            - bivariate trajectory statistics (means and variances)

        Notes:
            if no `trajectory` is given - returns stored data decomposition
            if no `state` arg. is given - uses stored analyzer state.
            if no 'size` arg is given - decomposes full [stored or given] trajectory
        """
        if state is not None:
            assert isinstance(state, BivariateTSModelState),\
                'Expected `state as instance of BivariateTSModelState but got: {}`'.format(type(state))
            s_state = state.s
            p_state = state.p
            stat = state.stat

        else:
            assert trajectory is None, 'When `trajectory` arg. is given, `state` is required'
            p_state = None
            s_state = None
            stat = self.stat.get_state()

        if trajectory is not None:
            ps_data = self._decompose(trajectory, stat.mean, stat.variance,
                                      self.u_decomp)
            p_data = ps_data[0, :]
            s_data = ps_data[1, :]

        else:
            p_data = None
            s_data = None

        p_transformed = self.p.transform(p_data, p_state, size)
        s_transformed = self.s.transform(s_data, s_state, size)

        return p_transformed, s_transformed, stat

    def get_trajectory(self, size=None, reconstruct=True):
        """
        Returns stored decomposition fragment and [optionally] time-series reconstruction.
        TODO: reconstruction is freaky due to only last stored statistic is used

        Args:
            size:           uint, fragment length to get in [1, ..., max_length] or None
            reconstruct:    bool, if True - also return data reconstruction

        Returns:
            array of [size ... max_length], array of [size ... max_length], array of size [2, size ... max_length]
            or
            array of [size ... max_length], array of [size ... max_length], None

            P,C [, and 2D trajectory] series as [ x[-size], x[-size+1], ... x[-1] ], up to length [size];
            if no `size` arg. is given - returns entire stored trajectory, up to length [max_length].

        """
        p_data = self.p.get_trajectory(size)
        s_data = self.s.get_trajectory(size)

        if reconstruct:
            trajectory = self.reconstruct(p_data, s_data)

        else:
            trajectory = None

        return p_data, s_data, trajectory

    @staticmethod
    def generate_trajectory_fn(batch_size,
                               size,
                               state,
                               reconstruct=False,
                               u_recon=None):
        """
        Generates batch of time-series realisations given model state.
        Static method, can be used as stand-along function.

        Args:
            batch_size:     uint, number of trajectories to generates
            size:           uint, trajectory length to generate
            state:          instance of BivariateTSModelState;
            reconstruct:    bool, if True - return time-series along with P, S trajectories, return None otherwise
            u_recon:        reconstruction matrix of size [2, 2] or None; required if reconstruct=True;

        Returns:
            generated P and S processes realisations of size [batch_size, 2, size];
            generated time-series reconstructions of size [batch_size, 2, size] or None;
        """
        assert isinstance(state, BivariateTSModelState), \
            'Expected `state` as instance of BivariateTSModelState, got: {}'.format(type(state))

        if reconstruct:
            assert u_recon is not None, 'reconstruct=True but reconstruction matrix is not provided.'

        # Unpack:
        p_state = state.p.process
        s_state = state.s.process

        # Get all samples for single batch (faster):
        p_params = OUProcess.sample_naive_unbiased(p_state, batch_size)
        s_params = OUProcess.sample_naive_unbiased(s_state, batch_size)

        # Concatenate batch-wise:
        parameters = OUEstimatorState(
            mu=np.concatenate([p_params.mu, s_params.mu]),
            log_theta=np.concatenate([p_params.log_theta, s_params.log_theta]),
            log_sigma=np.concatenate([p_params.log_sigma, s_params.log_sigma]),
        )
        driver_df = np.concatenate([
            np.tile(p_state.driver_df, batch_size),
            np.tile(s_state.driver_df, batch_size),
        ])
        # Access multivariate generator_fn directly to get batch of bivariate OU:
        batch_2x = OUProcess.generate_trajectory_fn(2 * batch_size, size,
                                                    parameters, driver_df)
        batch_2x = np.reshape(batch_2x, [2, batch_size, -1])
        batch_2x = np.swapaxes(batch_2x, 0, 1)

        if reconstruct:
            x = np.matmul(u_recon, batch_2x) * state.stat.variance[None, :, None] ** .5 \
                + state.stat.mean[None, :, None]

        else:
            x = None

        return batch_2x, x

    @staticmethod
    def generate_bivariate_trajectory_fn(batch_size,
                                         size,
                                         state,
                                         reconstruct=False,
                                         u_recon=None):
        """
        Generates batch of time-series realisations given model state.
        Static method, can be used as stand-along function.

        Args:
            batch_size:     uint, number of trajectories to generates
            size:           uint, trajectory length to generate
            state:          instance of BivariateTSModelState;
            reconstruct:    bool, if True - return time-series along with P, S trajectories, return None otherwise
            u_recon:        reconstruction matrix of size [2, 2] or None; required if reconstruct=True;

        Returns:
            generated P and S processes realisations of size [batch_size, 2, size];
            generated time-series reconstructions of size [batch_size, 2, size] or None;
        """
        assert isinstance(state, BivariateTSModelState), \
            'Expected `state` as instance of BivariateTSModelState, got: {}'.format(type(state))

        if reconstruct:
            assert u_recon is not None, 'reconstruct=True but reconstruction matrix is not provided.'

        # Unpack:
        p_state = state.p.process
        s_state = state.s.process

        # Get all samples for single batch (faster):
        p_params = OUProcess.sample_naive_unbiased(p_state, 1)
        s_params = OUProcess.sample_naive_unbiased(s_state, 1)

        # Concatenate batch-wise:
        parameters = OUEstimatorState(
            mu=np.concatenate([p_params.mu, s_params.mu]),
            log_theta=np.concatenate([p_params.log_theta, s_params.log_theta]),
            log_sigma=np.concatenate([p_params.log_sigma, s_params.log_sigma]),
        )
        driver_df = np.asarray([p_state.driver_df, s_state.driver_df])

        # Access multivariate generator_fn directly to get batch of 2d correlated OU's:
        batch_2d = OUProcess.generate_multivariate_trajectory_fn(
            batch_size=batch_size,
            size=size,
            parameters=parameters,
            t_df=driver_df,
            covariance=state.ps_stat.covariance)
        batch_2d = np.swapaxes(batch_2d, 1, 2)

        if reconstruct:
            x = np.matmul(u_recon, batch_2d) * state.stat.variance[None, :, None] ** .5 \
                + state.stat.mean[None, :, None]

        else:
            x = None

        return batch_2d, x

    def generate(self, batch_size, size, state=None, reconstruct=True):
        """
        Generates batch of time-series realisations given model state.

        Args:
            batch_size:     uint, number of trajectories to generates
            size:           uint, trajectory length to generate
            state:          instance of BivariateTSModelState or None;
                            if no state provided - current state is used.
            reconstruct:    bool, if True - return time-series along with P, S trajectories, return None otherwise

        Returns:
            generated P and S processes realisations of size [batch_size, 2, size];
            generated time-series reconstructions of size [batch_size, 2, size] or None;
        """
        if state is None:
            # Fit student-t df:
            _ = self.p.process.driver_estimator.fit()
            _ = self.s.process.driver_estimator.fit()

            state = self.get_state()

        # return self.generate_trajectory_fn(batch_size, size, state, reconstruct, self.u_recon)
        return self.generate_bivariate_trajectory_fn(batch_size, size, state,
                                                     reconstruct, self.u_recon)

Exemple #4

0

Afficher le fichier

Fichier : univariate.py Projet : war3gu/btgym

class OUProcess:
    """
    Provides essential functionality for recursive time series modeling
    as Ornshteinh-Uhlenbeck stochastic process:
    parameters estimation, state filtering and sampling, trajectories generation.
    """
    def __init__(self, alpha=None, filter_alpha=None):
        self.alpha = alpha
        self.filter_alpha = filter_alpha
        self.estimator = OUEstimator(alpha)

        # Just use exponential smoothing as state-space trajectory filter:
        self.filter = Covariance(3, alpha=filter_alpha)

        # Driver is Student-t:
        self.driver_estimator = STEstimator(alpha)

        # Empirical statistics tracker (mostly for accuracy checking, not included in OUProcessState):
        self.stat = Zscore(1, alpha)

        self.is_ready = False

    def ready(self):
        assert self.is_ready, 'OUProcess is not initialized. Hint: forgot to call .reset()?'

    def get_state(self):
        """
        Returns model state tuple.

        Returns:
            current state as instance of OUProcessState
        """
        self.ready()
        return OUProcessState(
            observation=self.estimator.get_state(),
            filtered=self.filter.get_state(),
            driver_df=self.driver_estimator.df,
        )

    @staticmethod
    def get_random_state(mu=(0, 0),
                         theta=(.1, 1),
                         sigma=(0.1, 1),
                         driver_df=(3, 50),
                         variance=1e-2):
        """
        Samples random uniform process state w.r.t. parameters intervals given.

        Args:
            mu:         iterable of floats as [lower_bound, upper_bound], OU Mu sampling interval
            theta:      iterable of positive floats as [lower_bound, upper_bound], OU Theta sampling interval
            sigma:      iterable of positive floats as [lower_bound, upper_bound], OU Sigma sampling interval
            driver_df:  iterable of positive floats as [lower_bound > 2, upper_bound],
                        student-t driver degrees of freedom sampling interval
            variance:   filtered observation variance (same fixed for all params., covariance assumed diagonal)

        Returns:
            instance of OUProcessState
        """
        sample = dict()
        for name, param, low_threshold in zip(
            ['mu', 'theta', 'sigma', 'driver_df'],
            [mu, theta, sigma, driver_df], [-np.inf, 1e-8, 1e-8, 2.999]):
            interval = np.asarray(param)
            assert interval.ndim == 1 and interval[0] <= interval[-1], \
                ' Expected param `{}` as iterable of ordered values as: [lower_bound, upper_bound], got: {}'.format(
                    name, interval
                )
            assert interval[0] > low_threshold, \
                'Expected param `{}` lower bound be bigger than {}, got: {}'.format(name, low_threshold, interval[0])
            sample[name] = np.random.uniform(low=interval[0],
                                             high=interval[-1])

        observation = OUEstimatorState(mu=sample['mu'],
                                       log_theta=np.log(sample['theta']),
                                       log_sigma=np.log(sample['sigma']))
        filtered = CovarianceState(
            mean=np.asarray(observation),
            variance=np.ones(3) * variance,
            covariance=np.eye(3) * variance,
        )
        return OUProcessState(
            observation=observation,
            filtered=filtered,
            driver_df=sample['driver_df'],
        )

    def fit_driver(self, trajectory=None):
        """
        Updates Student-t driver shape parameter. Needs entire trajectory for correct estimation.
        TODO: make recursive update.

        Args:
            trajectory: full observed data of size ~[max_length] or None

        Returns:
            Estimated shape parameter.
        """
        self.ready()
        driver_df, _, _ = self.driver_estimator.fit(trajectory)

        return driver_df

    def reset(self, init_trajectory):
        """
        Resets model parameters for process dX = -Theta *(X - Mu) + Sigma * dW
        and starts new trajectory given initial data.

        Args:
            init_trajectory:    initial 1D process observations trajectory of size [num_points]
        """
        _ = self.stat.reset(init_trajectory[None, :])

        init_observation = np.asarray(self.estimator.reset(init_trajectory))
        # 2x observation to get initial covariance matrix estimate:
        init_observation = np.stack([init_observation, init_observation],
                                    axis=-1)

        _ = self.filter.reset(init_observation)

        self.driver_estimator.reset(self.estimator.residuals)
        self.is_ready = True

    def update(self, trajectory, disjoint=False):
        """
        Updates model parameters estimates for process dX = -Theta *(X - Mu) + Sigma * dW
        given new observations.

        Args:
            trajectory:  1D process observations trajectory update of size [num_points]
            disjoint:    bool, indicates whether update given is continuous or disjoint w.r.t. previous one
        """
        self.ready()
        _ = self.stat.update(
            trajectory[None, :])  # todo: disjoint is ignored or reset stat?

        # Get new state-space observation:
        observation = self.estimator.update(trajectory, disjoint)

        # Smooth and make it random variable:
        _ = self.filter.update(np.asarray(observation)[:, None])

        # Residuals distr. shape update but do not fit:
        self.driver_estimator.update(self.estimator.residuals)

    @staticmethod
    def sample_from_filtered(filter_state, size=1):
        """
        Samples process parameters values given smoothed observations.
        Static method, can be used as stand-along function.

        Args:
            filter_state:  instance of CovarianceState of dimensionality 3
            size:          int or None, number of samples to draw

        Returns:
            sampled process parameters of size [size] each, packed as OUEstimatorState tuple

        """
        assert isinstance(filter_state, CovarianceState),\
            'Expected filter_state as instance of CovarianceState, got: {}'.format(type(filter_state))

        sample = np.random.multivariate_normal(filter_state.mean,
                                               filter_state.covariance,
                                               size=size)

        return OUEstimatorState(
            mu=sample[:, 0],
            log_theta=sample[:, 1],
            log_sigma=sample[:, 2],
        )

    @staticmethod
    def sample_naive_unbiased(state, size=1):
        """
        Samples process parameters values given observed values and smoothed covariance.
        Static method, can be used as stand-along function.

        Args:
            state:  instance of OUProcessState
            size:   int or None, number of samples to draw

        Returns:
            sampled process parameters of size [size] each, packed as OUEstimatorState tuple

        """
        assert isinstance(state, OUProcessState), \
            'Expected filter_state as instance of `OUProcessState`, got: {}'.format(type(state))

        # naive_mean = (np.asarray(state.observation) + state.filtered.mean) / 2
        naive_mean = np.asarray(state.observation)
        sample = np.random.multivariate_normal(naive_mean,
                                               state.filtered.covariance,
                                               size=size)

        return OUEstimatorState(
            mu=sample[:, 0],
            log_theta=sample[:, 1],
            log_sigma=sample[:, 2],
        )

    def sample_parameters(self, state=None, size=1):
        """
        Samples process parameters values given process state;

        Args:
            state:  instance of OUProcessState or None;
                    if no state provided - current state is used;
            size:   number of samples to draw;

        Returns:
            sampled process parameters of size [size] each, packed as OUEstimatorState tuple
        """
        if state is None:
            state = self.get_state()

        else:
            assert isinstance(state, OUProcessState),\
                'Expected state as instance of OUProcessState, got: {}'.format(type(state))

        # return self.sample_from_filtered(state.filtered, size=size)
        return self.sample_naive_unbiased(state, size=size)

    @staticmethod
    def generate_trajectory_fn(batch_size, size, parameters, t_df):
        """
        Generates batch of univariate process realisations given process parameters.
        Static method, can be used as stand-along function.

        Args:
            batch_size:     uint, number of trajectories to generates
            size:           uint, trajectory length to generate
            parameters:     instance of OUEstimatorState of size [batch_size] for each parameter
            t_df:           float > 3.0, driver shape param.

        Returns:
            process realisations as 2d array of size [batch_size, size]
        """
        assert isinstance(parameters, OUEstimatorState), \
            'Expected `parameters` as instance of OUEstimatorState, got: {}'.format(type(parameters))

        for param in parameters:
            assert param.shape[0] == batch_size,\
                'Given `parameters` length: {} and `batch_size`: {} does not match.'.format(param.shape[0], batch_size)

        if isinstance(t_df, float) or isinstance(t_df, int):
            t_df = np.tile(t_df, batch_size)

        else:
            assert t_df.shape[0] == batch_size, \
                'Given `t_df` parameters length: {} and `batch_size`: {} does not match.'.format(t_df.shape[0], batch_size)

        trajectory = ou_process_t_driver_batch_fn(
            size,
            mu=parameters.mu,
            l=np.exp(parameters.log_theta),
            sigma=np.exp(parameters.log_sigma),
            df=t_df,
            x0=parameters.mu,
        )
        return trajectory.T

    @staticmethod
    def generate_multivariate_trajectory_fn(batch_size, size, parameters, t_df,
                                            covariance):
        """
        Generates batch of realisations of multivariate Ornshtein-Uhlenbeck process.
        Note differences in parameters dimensionality w.r.t. univarite case!
        Static method, can be used as stand-along function.

        Args:
            batch_size:     uint, number of trajectories to generates
            size:           uint, trajectory length to generate
            parameters:     instance of OUEstimatorState of size [process_dim] for each parameter
            t_df:           array_like, driver shape param. vector of size [process_dim]
            covariance:     process innovations covariance matrix of size [process_dim, process_dim]

        Returns:
            process realisations as array of size [batch_size, size, process_dim]
        """
        assert isinstance(parameters, OUEstimatorState), \
            'Expected `parameters` as instance of OUEstimatorState, got: {}'.format(type(parameters))

        trajectory = multivariate_ou_process_t_driver_batch_fn(
            batch_size=batch_size,
            num_points=size,
            mu=parameters.mu,
            theta=np.exp(parameters.log_theta),
            sigma=np.exp(parameters.log_sigma),
            cov=covariance,
            df=t_df,
            x0=parameters.mu,
        )
        return trajectory

    def generate(self, batch_size, size, state=None, driver_df=None):
        """
        Generates batch of realisations given process state.

        Args:
            batch_size:     uint, number of trajectories to generates
            size:           uint, trajectory length to generate
            state:          instance of OUProcessState or None;
                            if no state provided - current state is used.
            driver_df:      t-student process driver degree of freedom parameter or None;
                            if no value provided - current value is used;

        Returns:
            process realisations of size [batch_size, size]

        """
        self.ready()
        parameters = self.sample_parameters(state, size=batch_size)

        if driver_df is None:
            driver_df, _, _ = self.driver_estimator.fit()
        print('driver_df: ', driver_df)
        return self.generate_trajectory_fn(batch_size, size, parameters,
                                           driver_df)