예제 #1
0
    def __init__(self, env_id=None):
        """

        Args:
            env_id:     conventional Gym id.
        """
        assert "." not in env_id  # universe environments have dots in names.
        env = gym.make(env_id)
        super(AtariRescale42x42, self).__init__(env)
        self.observation_space = DictSpace(
            {'external': spaces.Box(0.0, 1.0, [42, 42, 1])})
예제 #2
0
class PairSpreadStrategy_0(BaseStrategy6):
    """
    Expects pair of data streams. Forms spread as only virtual trading asset.
    """

    # Time embedding period:
    time_dim = 128  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 64

    # Possible agent actions;  Note: place 'hold' first! :
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    features_parameters = (1, 4, 16, 64, 256, 1024)
    num_features = len(features_parameters)

    params = dict(
        state_shape={
            'external':
            spaces.Box(low=-10,
                       high=10,
                       shape=(time_dim, 1, num_features * 2),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-100,
                       high=100,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'expert':
            spaces.Box(low=0,
                       high=10,
                       shape=(len(portfolio_actions), ),
                       dtype=np.float32),
            'stat':
            spaces.Box(low=-100, high=100, shape=(3, 1), dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
                # TODO: make generator parameters names standard
                'generator':
                DictSpace({
                    'mu':
                    spaces.Box(shape=(),
                               low=np.finfo(np.float64).min,
                               high=np.finfo(np.float64).max,
                               dtype=np.float64),
                    'l':
                    spaces.Box(shape=(),
                               low=0,
                               high=np.finfo(np.float64).max,
                               dtype=np.float64),
                    'sigma':
                    spaces.Box(shape=(),
                               low=0,
                               high=np.finfo(np.float64).max,
                               dtype=np.float64),
                    'x0':
                    spaces.Box(shape=(),
                               low=np.finfo(np.float64).min,
                               high=np.finfo(np.float64).max,
                               dtype=np.float64)
                })
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        slippage=None,
        leverage=1.0,
        gamma=1.0,  # fi_gamma, ~ should match MDP gamma decay
        reward_scale=1,  # reward multiplicator
        norm_alpha=0.001,  # renormalisation tracking decay in []0, 1]
        norm_alpha_2=
        0.01,  # float in []0, 1], tracking decay for original prices
        drawdown_call=
        10,  # finish episode when hitting drawdown treshghold, in percent.
        dataset_stat=
        None,  # Summary descriptive statistics for entire dataset and
        episode_stat=None,  # current episode. Got updated by server.
        time_dim=time_dim,  # time embedding period
        avg_period=
        avg_period,  # number of time steps reward estimation statistics are averaged over
        features_parameters=features_parameters,
        num_features=num_features,
        metadata={},
        broadcast_message={},
        trial_stat=None,
        trial_metadata=None,
        portfolio_actions=portfolio_actions,
        skip_frame=
        1,  # number of environment steps to skip before returning next environment response
        position_max_depth=1,
        order_size=1,  # legacy plug, to be removed <-- rework gen_6.__init__
        initial_action=None,
        initial_portfolio_action=None,
        state_int_scale=1,
        state_ext_scale=1,
    )

    def __init__(self, **kwargs):
        super(PairSpreadStrategy_0, self).__init__(**kwargs)

        assert len(self.p.asset_names
                   ) == 1, 'Only one derivative spread asset is supported'
        assert len(self.getdatanames()) == 2, \
            'Expected exactly two input datalines but {} where given'.format(self.getdatanames())

        if isinstance(self.p.asset_names, str):
            self.p.asset_names = [self.p.asset_names]
        self.action_key = list(self.p.asset_names)[0]

        self.current_expert_action = np.zeros(len(self.p.portfolio_actions))
        self.state['metadata'] = self.metadata

        # Infer OU generator params:
        generator_keys = self.p.state_shape['metadata'].spaces[
            'generator'].spaces.keys()
        if 'generator' not in self.p.metadata.keys(
        ) or self.p.metadata['generator'] == {}:
            self.metadata['generator'] = {
                key: np.asarray(0)
                for key in generator_keys
            }

        else:
            # self.metadata['generator'] = {key: self.p.metadata['generator'][key] for key in generator_keys}

            # TODO: clean up this mess, refine names:

            self.metadata['generator'] = {
                'l': self.p.metadata['generator']['ou_lambda'],
                'mu': self.p.metadata['generator']['ou_mu'],
                'sigma': self.p.metadata['generator']['ou_sigma'],
                'x0': 0,
            }

            # Make scalars np arrays to comply gym.spaces.Box specs:
            for k, v in self.metadata['generator'].items():
                self.metadata['generator'][k] = np.asarray(v)

        self.last_delta_total_pnl = 0
        self.last_pnl = 0

        self.log.debug('startegy got broadcast_msg: <<{}>>'.format(
            self.p.broadcast_message))

        # Track original prices statistics, let base self.norm_stat_tracker track spread (=stat_asset) itself:
        self.norm_stat_tracker_2 = Zscore(2, self.p.norm_alpha_2)

        # Synthetic spread order size estimator:
        self.spread_sizer = SpreadSizer(
            init_cash=self.p.start_cash,
            position_max_depth=self.p.position_max_depth,
            leverage=self.p.leverage,
            margin_reserve=self.margin_reserve,
        )
        self.last_action = None

        # Keeps track of virtual spread position
        # long_ spread: >0, short_spread: <0, no positions: 0
        self.spread_position_size = 0

        # Reward signal filtering:
        self.kf = KalmanFilter(initial_state_mean=0,
                               transition_covariance=.01,
                               observation_covariance=1,
                               n_dim_obs=1)
        self.kf_state = [0, 0]

    def set_datalines(self):
        # Override stat line:
        self.stat_asset = self.data.spread = SpreadConstructor()

        # Spy on reward behaviour:
        self.reward_tracker = CumSumReward()

        self.data.std = btind.StdDev(self.data.spread,
                                     period=self.p.time_dim,
                                     safepow=True)
        self.data.std.plotinfo.plot = False

        self.data.features = [
            btind.EMA(self.data.spread, period=period)
            for period in self.p.features_parameters
        ]
        initial_time_period = np.asarray(
            self.p.features_parameters).max() + self.p.time_dim
        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0], period=initial_time_period)
        self.data.dim_sma.plotinfo.plot = False

    def get_broadcast_message(self):
        """
        Not used.
        """
        return {
            'data_model_psi': np.zeros([2, 3]),
            'iteration': self.iteration
        }

    def get_expert_state(self):
        """
        Not used.
        """
        return np.zeros(len(self.p.portfolio_actions))

    def prenext(self):
        if self.pre_iteration + 2 > self.p.time_dim - self.avg_period:
            self.update_broker_stat()
            x_upd = np.stack([
                np.asarray(self.datas[0].get(size=1)),
                np.asarray(self.datas[1].get(size=1))
            ],
                             axis=0)
            _ = self.norm_stat_tracker_2.update(
                x_upd
            )  # doubles update_broker_stat() but helps faster stabilization

        elif self.pre_iteration + 2 == self.p.time_dim - self.avg_period:
            # Initialize all trackers:
            x_init = np.stack([
                np.asarray(self.datas[0].get(size=self.data.close.buflen())),
                np.asarray(self.datas[1].get(size=self.data.close.buflen()))
            ],
                              axis=0)
            _ = self.norm_stat_tracker_2.reset(x_init)
            _ = self.norm_stat_tracker.reset(
                np.asarray(self.stat_asset.get(
                    size=self.data.close.buflen()))[None, :])
            # _ = self.norm_stat_tracker.reset(np.asarray(self.stat_asset.get(size=1))[None, :])

        self.pre_iteration += 1

    def nextstart(self):
        self.inner_embedding = self.data.close.buflen()
        self.log.debug('Inner time embedding: {}'.format(self.inner_embedding))

    def get_normalisation(self):
        """
        Estimates current normalisation constants, updates `normalisation_state` attr.

        Returns:
            instance of NormalisationState tuple
        """
        # Update synth. spread rolling normalizers:
        x_upd = np.stack([
            np.asarray(self.datas[0].get(size=1)),
            np.asarray(self.datas[1].get(size=1))
        ],
                         axis=0)
        _ = self.norm_stat_tracker_2.update(x_upd)

        # ...and use [normalised] spread rolling mean and variance to estimate NormalisationState
        # used to normalize all broker statistics and reward:
        spread_data = np.asarray(self.stat_asset.get(size=1))

        mean, var = self.norm_stat_tracker.update(spread_data[None, :])
        var = np.clip(var, 1e-8, None)

        # Use 99% N(stat_data_mean, stat_data_std) intervals as normalisation interval:
        intervals = stats.norm.interval(.99, mean, var**.5)
        self.normalisation_state = NormalisationState(
            mean=float(mean),
            variance=float(var),
            low_interval=intervals[0][0],
            up_interval=intervals[1][0])
        return self.normalisation_state

    def get_stat_state(self):
        return np.concatenate([
            np.asarray(self.norm_stat_tracker.get_state()),
            np.asarray(self.stat_asset.get())[None, :]
        ],
                              axis=0)

    def get_external_state(self):
        """
        Attempt to include avg decomp. of original normalised spread
        """
        x_sma = np.stack([
            feature.get(size=self.p.time_dim) for feature in self.data.features
        ],
                         axis=-1)
        scale = 1 / np.clip(self.data.std[0], 1e-10, None)
        x_sma *= scale  # <-- more or less ok

        # Gradient along features axis:
        dx = np.gradient(x_sma, axis=-1)

        # TODO: different conv. encoders for these two types of features:
        x = np.concatenate([x_sma, dx], axis=-1)

        # Crop outliers:
        x = np.clip(x, -10, 10)
        return x[:, None, :]

    def get_order_sizes(self):
        """
        Estimates current order sizes for assets in trade, updates attribute.

        Returns:
            array-like of floats
        """
        s = self.norm_stat_tracker_2.get_state()
        self.current_order_sizes = np.asarray(self.spread_sizer.get_sizing(
            self.env.broker.get_value(), s.mean, s.variance),
                                              dtype=np.float)
        return self.current_order_sizes

    def long_spread(self):
        """
        Opens or adds up long spread `virtual position`.
        """
        # Get current sizes:
        order_sizes = self.get_order_sizes()

        if self.spread_position_size >= 0:
            if not self.can_add_up(order_sizes[0], order_sizes[1]):
                self.order_failed += 1
                # self.log.warning(
                #     'Adding Long spread to existing {} hit margin, ignored'.format(self.spread_position_size)
                # )
                return

        elif self.spread_position_size == -1:
            # Currently in single short -> just close to prevent disballance:
            return self.close_spread()

        name1 = self.datas[0]._name
        name2 = self.datas[1]._name

        self.order = self.buy(data=name1, size=order_sizes[0])
        self.order = self.sell(data=name2, size=order_sizes[1])
        self.spread_position_size += 1
        # self.log.warning('long spread submitted, new pos. size: {}'.format(self.spread_position_size))

    def short_spread(self):
        order_sizes = self.get_order_sizes()

        if self.spread_position_size <= 0:
            if not self.can_add_up(order_sizes[0], order_sizes[1]):
                self.order_failed += 1
                # self.log.warning(
                #     'Adding Short spread to existing {} hit margin, ignored'.format(self.spread_position_size)
                # )
                return

        elif self.spread_position_size == 1:
            # Currently in single long:
            return self.close_spread()

        name1 = self.datas[0]._name
        name2 = self.datas[1]._name

        self.order = self.sell(data=name1, size=order_sizes[0])
        self.order = self.buy(data=name2, size=order_sizes[1])
        self.spread_position_size -= 1
        # self.log.warning('short spread submitted, new pos. size: {}'.format(self.spread_position_size))

    def close_spread(self):
        self.order = self.close(data=self.datas[0]._name)
        self.order = self.close(data=self.datas[1]._name)
        self.spread_position_size = 0
        # self.log.warning('close spread submitted, new pos. size: {}'.format(self.spread_position_size))

    def can_add_up(self, order_0_size=None, order_1_size=None):
        """
        Checks if there enough cash left to open synthetic spread position

        Args:
            order_0_size:   float, order size for data0 asset or None
            order_1_size:   float, order size for data1 asset or None

        Returns:
            True if possible, False otherwise
        """
        if order_1_size is None or order_0_size is None:
            order_sizes = self.get_order_sizes()
            order_0_size = order_sizes[0]
            order_1_size = order_sizes[1]

        # Get full operation cost:
        # TODO: it can be two commissions schemes
        op_cost = [
            self.env.broker.comminfo[None].getoperationcost(
                size=size, price=self.getdatabyname(name).high[0]) /
            self.env.broker.comminfo[None].get_leverage() +
            self.env.broker.comminfo[None].getcommission(
                size=size, price=self.getdatabyname(name).high[0])
            for size, name in zip([order_0_size, order_1_size],
                                  [self.datas[0]._name, self.datas[1]._name])
        ]
        # self.log.warning('op_cost+comm+reserve: {:.4f}'.format(np.asarray(op_cost).sum() + self.margin_reserve))
        # self.log.warning('order sizes: {:.4f}; {:.4f}'.format(order_0_size, order_1_size))
        # self.log.warning('leverage: {}'.format(self.env.broker.comminfo[None].get_leverage()))
        # self.log.warning(
        #     'commision: {:.4f} + {:.4f}'.format(
        #         self.env.broker.comminfo[None].getcommission(
        #             size=order_0_size,
        #             price=self.getdatabyname(self.datas[0]._name).high[0]
        #         ),
        #         self.env.broker.comminfo[None].getcommission(
        #             size=order_1_size,
        #             price=self.getdatabyname(self.datas[1]._name).high[0]
        #         ),
        #     )
        # )
        # self.log.warning('current_cash: {}'.format(self.env.broker.get_cash()))
        if np.asarray(op_cost).sum(
        ) + self.margin_reserve >= self.env.broker.get_cash() * (
                1 - self.margin_reserve):
            # self.log.warning('add_up check failed')
            return False

        else:
            # self.log.warning('add_up check ok')
            return True

    def get_broker_pos_duration(self, **kwargs):
        """
        Position duration is measured w.r.t. virtual spread position, not broker account exposure
        """
        if self.spread_position_size == 0:
            self.current_pos_duration = 0
            # self.log.warning('zero position')

        else:
            self.current_pos_duration += 1
            # self.log.warning('position duration: {}'.format(self.current_pos_duration))

        return self.current_pos_duration

    def notify_order(self, order):
        """
        Shamelessly taken from backtrader tutorial.
        TODO: better multi data support
        """
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.broker_message = 'BUY executed,\nPrice: {:.5f}, Cost: {:.4f}, Comm: {:.4f}'. \
                    format(order.executed.price,
                           order.executed.value,
                           order.executed.comm)
                self.buyprice = order.executed.price
                self.buycomm = order.executed.comm

            else:  # Sell
                self.broker_message = 'SELL executed,\nPrice: {:.5f}, Cost: {:.4f}, Comm: {:.4f}'. \
                    format(order.executed.price,
                           order.executed.value,
                           order.executed.comm)
            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.broker_message = 'ORDER FAILED with status: ' + str(
                order.getstatusname())

        # self.log.warning('BM: {}'.format(self.broker_message))
        self.order = None

    def get_reward(self):
        """
        Shapes reward function as normalized single trade realized profit/loss,
        augmented with potential-based reward shaping functions in form of:
        F(s, a, s`) = gamma * FI(s`) - FI(s);
        Potential FI_1 is current normalized unrealized profit/loss.

        Paper:
            "Policy invariance under reward transformations:
             Theory and application to reward shaping" by A. Ng et al., 1999;
             http://www.robotics.stanford.edu/~ang/papers/shaping-icml99.pdf
        """

        # All sliding statistics for this step are already updated by get_state().

        # Potential-based shaping function 1:
        # based on potential of averaged profit/loss for current opened trade (unrealized p/l):
        unrealised_pnl = np.asarray(self.broker_stat['unrealized_pnl'])
        current_pos_duration = self.broker_stat['pos_duration'][-1]

        # We want to estimate potential `fi = gamma*fi_prime - fi` of current opened position,
        # thus need to consider different cases given skip_fame parameter:
        if current_pos_duration == 0:
            # Set potential term to zero if there is no opened positions:
            fi_1 = 0
            fi_1_prime = 0
            # Reset filter state:
            self.kf_state = [0, 0]
        else:
            fi_1 = self.last_pnl
            # fi_1_prime = np.average(unrealised_pnl[-1])
            self.kf_state = self.kf.filter_update(
                filtered_state_mean=self.kf_state[0],
                filtered_state_covariance=self.kf_state[1],
                observation=unrealised_pnl[-1],
            )
            fi_1_prime = np.squeeze(self.kf_state[0])

        # Potential term 1:
        f1 = self.p.gamma * fi_1_prime - fi_1
        self.last_pnl = fi_1_prime

        # Potential-based shaping function 2:
        # based on potential of averaged profit/loss for global unrealized pnl:
        total_pnl = np.asarray(self.broker_stat['total_unrealized_pnl'])
        delta_total_pnl = np.average(
            total_pnl[-self.p.skip_frame:]) - np.average(
                total_pnl[:-self.p.skip_frame])

        fi_2 = delta_total_pnl
        fi_2_prime = self.last_delta_total_pnl

        # Potential term 2:
        f2 = self.p.gamma * fi_2_prime - fi_2
        self.last_delta_total_pnl = delta_total_pnl

        # Potential term 3:
        # f3 = 1 + 0.5 * np.log(1 + current_pos_duration)
        f3 = 1.0

        # Main reward function: normalized realized profit/loss:
        realized_pnl = np.asarray(
            self.broker_stat['realized_pnl'])[-self.p.skip_frame:].sum()

        # Weights are subject to tune:
        self.reward = (0.1 * f1 * f3 + 1.0 *
                       realized_pnl) * self.p.reward_scale  #/ self.normalizer
        # self.reward = np.clip(self.reward, -self.p.reward_scale, self.p.reward_scale)

        self.reward = np.clip(self.reward, -1e3, 1e3)

        return self.reward

    def _next_discrete(self, action):
        """
        Manages spread virtual positions.

        Args:
            action:     dict, string encoding of btgym.spaces.ActionDictSpace

        """
        # Here we expect action dict to contain single key:
        single_action = action[self.action_key]

        if single_action == 'hold' or self.is_done_enabled:
            pass
        elif single_action == 'buy':
            self.long_spread()
            self.broker_message = 'new {}_LONG created; '.format(
                self.action_key) + self.broker_message
        elif single_action == 'sell':
            self.short_spread()
            self.broker_message = 'new {}_SHORT created; '.format(
                self.action_key) + self.broker_message
        elif single_action == 'close':
            self.close_spread()
            self.broker_message = 'new {}_CLOSE created; '.format(
                self.action_key) + self.broker_message
예제 #3
0
class DevStrat_4_12(DevStrat_4_11):
    """
    4_11 + sma-features 8, 512;
    """
    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Hyperparameters for estimating signal features:
    features_parameters = [8, 16, 32, 64, 128, 256]
    num_features = len(features_parameters)

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 0.99  # fi_gamma, should be MDP gamma decay

    reward_scale = 1  # reward multiplicator

    state_ext_scale = np.linspace(3e3, 1e3, num=num_features)

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-100,
                       high=100,
                       shape=(time_dim, 1, num_features),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'datetime':
            spaces.Box(low=0, high=1, shape=(1, 5), dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        initial_action=None,
        initial_portfolio_action=None,
        skip_frame=skip_frame,
        state_ext_scale=state_ext_scale,  # EURUSD
        state_int_scale=1.0,
        gamma=gamma,
        reward_scale=1.0,
        metadata={},
    )

    def set_datalines(self):
        self.data.features = [
            btind.SimpleMovingAverage(self.datas[0], period=period)
            for period in self.features_parameters
        ]

        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0],
            period=(np.asarray(self.features_parameters).max() +
                    self.time_dim))
        self.data.dim_sma.plotinfo.plot = False

    def get_external_state(self):

        x_sma = np.stack([
            feature.get(size=self.time_dim) for feature in self.data.features
        ],
                         axis=-1)
        # Gradient along features axis:
        dx = np.gradient(x_sma, axis=-1) * self.p.state_ext_scale

        # In [-1,1]:
        x = tanh(dx)
        return x[:, None, :]

    def get_internal_state(self):

        x_broker = np.concatenate([
            np.asarray(self.broker_stat['value'])[..., None],
            np.asarray(self.broker_stat['unrealized_pnl'])[..., None],
            np.asarray(self.broker_stat['realized_pnl'])[..., None],
            np.asarray(self.broker_stat['cash'])[..., None],
            np.asarray(self.broker_stat['exposure'])[..., None],
        ],
                                  axis=-1)
        x_broker = tanh(
            np.gradient(x_broker, axis=-1) * self.p.state_int_scale)

        return x_broker[:, None, :]

    def get_datetime_state(self):
        time = self.data.datetime.time()
        date = self.data.datetime.date()

        # Encode in [0, 1]:
        mn = date.month / 12
        wd = date.weekday() / 6
        d = date.day / 31
        h = time.hour / 24
        mm = time.minute / 60

        encoded_stamp = [mn, d, wd, h, mm]
        return np.asarray(encoded_stamp)[None, :]
예제 #4
0
class DevStrat_4_11(DevStrat_4_10):
    """
    4_10 + Another set of sma-features, grads for broker state
    """
    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 0.99  # fi_gamma, should be MDP gamma decay

    reward_scale = 1  # reward multiplicator

    state_ext_scale = np.linspace(3e3, 1e3, num=5)

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-100,
                       high=100,
                       shape=(time_dim, 1, 5),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 6),
                       dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        initial_action=None,
        initial_portfolio_action=None,
        skip_frame=skip_frame,
        gamma=gamma,
        reward_scale=1.0,
        state_ext_scale=state_ext_scale,  # EURUSD
        state_int_scale=1.0,
        metadata={},
    )

    def set_datalines(self):
        self.data.sma_16 = btind.SimpleMovingAverage(self.datas[0], period=16)
        self.data.sma_32 = btind.SimpleMovingAverage(self.datas[0], period=32)
        self.data.sma_64 = btind.SimpleMovingAverage(self.datas[0], period=64)
        self.data.sma_128 = btind.SimpleMovingAverage(self.datas[0],
                                                      period=128)
        self.data.sma_256 = btind.SimpleMovingAverage(self.datas[0],
                                                      period=256)

        self.data.dim_sma = btind.SimpleMovingAverage(self.datas[0],
                                                      period=(256 +
                                                              self.time_dim))
        self.data.dim_sma.plotinfo.plot = False

    def get_external_state(self):

        x_sma = np.stack([
            np.frombuffer(self.data.sma_16.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_32.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_64.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_128.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_256.get(size=self.time_dim)),
        ],
                         axis=-1)
        # Gradient along features axis:
        dx = np.gradient(x_sma, axis=-1) * self.p.state_ext_scale

        x = tanh(dx)

        return x[:, None, :]

    def get_internal_state(self):

        x_broker = np.concatenate(
            [
                np.asarray(self.broker_stat['value'])[..., None],
                np.asarray(self.broker_stat['unrealized_pnl'])[..., None],
                np.asarray(self.broker_stat['realized_pnl'])[..., None],
                np.asarray(self.broker_stat['cash'])[..., None],
                np.asarray(self.broker_stat['exposure'])[..., None],
                np.asarray(self.broker_stat['pos_direction'])[..., None],

                # np.asarray(self.broker_stat['value'])[-self.p.skip_frame:, None],
                # np.asarray(self.broker_stat['unrealized_pnl'])[-self.p.skip_frame:, None],
                # np.asarray(self.broker_stat['realized_pnl'])[-self.p.skip_frame:, None],
                # np.asarray(self.broker_stat['cash'])[-self.p.skip_frame:, None],
                # np.asarray(self.broker_stat['exposure'])[-self.p.skip_frame:, None],
                # np.asarray(self.broker_stat['pos_direction'])[-self.p.skip_frame:, None],
            ],
            axis=-1)
        x_broker = tanh(
            np.gradient(x_broker, axis=-1) * self.p.state_int_scale)
        # return x_broker[:, None, :]
        return np.clip(x_broker[:, None, :], -2, 2)
예제 #5
0
class DevStrat_4_8(DevStrat_4_7):
    """
    4_7 + Uses full average_period of inner stats for use with inner_conv_encoder.
    """
    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    # !..-> here it is also `broker state` time-embedding period
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 1.0  # fi_gamma, should be MDP gamma decay, but somehow undiscounted works better <- wtf?

    reward_scale = 1  # reward multiplicator

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-1,
                       high=1,
                       shape=(time_dim, 1, 3),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        initial_action=None,
        initial_portfolio_action=None,
        skip_frame=skip_frame,
        gamma=gamma,
        reward_scale=1.0,
        state_ext_scale=2e3,  # EURUSD
        state_int_scale=1.0,  # not used
        metadata={},
    )

    def get_internal_state(self):
        x_broker = np.concatenate(
            [
                np.asarray(self.broker_stat['value'])[..., None],
                np.asarray(self.broker_stat['unrealized_pnl'])[..., None],
                np.asarray(self.broker_stat['realized_pnl'])[..., None],
                np.asarray(self.broker_stat['cash'])[..., None],
                np.asarray(self.broker_stat['exposure'])[..., None],
                # np.asarray(self.sliding_stat['episode_step'])[..., None],
                # np.asarray(self.sliding_stat['reward'])[..., None],
                # np.asarray(self.sliding_stat['action'])[..., None],
                # norm_position_duration[...,None],
                # max_unrealized_pnl[..., None],
                # min_unrealized_pnl[..., None],
            ],
            axis=-1)
        return x_broker[:, None, :]
예제 #6
0
class DevStrat_4_7(DevStrat_4_6):
    """
    4_6 + Sliding statistics avg_period disentangled from time embedding dim;
    Only one last step sliding stats are used for internal state;
    Reward weights: 1, 2, 10 , reward scale factor aded;
    """

    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 1.0  # fi_gamma, should be MDP gamma decay

    reward_scale = 1.0  # reward scaler

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-1,
                       high=1,
                       shape=(time_dim, 1, 3),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-2, high=2, shape=(1, 1, 5), dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        initial_action=None,
        initial_portfolio_action=None,
        skip_frame=skip_frame,
        gamma=gamma,
        reward_scale=1.0,
        state_ext_scale=2e3,  # EURUSD
        state_int_scale=1.0,  # not used
        metadata={})

    def __init__(self, **kwargs):
        super(DevStrat_4_7, self).__init__(**kwargs)

    def get_internal_state(self):
        x_broker = np.stack([
            self.broker_stat['value'][-1],
            self.broker_stat['unrealized_pnl'][-1],
            self.broker_stat['realized_pnl'][-1],
            self.broker_stat['cash'][-1],
            self.broker_stat['exposure'][-1],
        ])
        return x_broker[None, None, :]
예제 #7
0
파일: strategy_4.py 프로젝트: kazi308/btgym
class DevStrat_4_8(DevStrat_4_7):
    """
    4_7 +:
    Uses full average_period of inner stats for use with inner_conv_encoder.
    """
    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    # !..-> here it is also `broker state` time-embedding period
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 1.0  # fi_gamma, should be MDP gamma decay, but somehow undiscounted works better <- wtf?!

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-1, high=1, shape=(time_dim, 1, 3)),
            'internal':
            spaces.Box(low=-2, high=2, shape=(avg_period, 1, 5)),
            'metadata':
            DictSpace({
                'type': spaces.Box(shape=(), low=0, high=1),
                'trial_num': spaces.Box(shape=(), low=0, high=10**10),
                'sample_num': spaces.Box(shape=(), low=0, high=10**10),
                'first_row': spaces.Box(shape=(), low=0, high=10**10)
            })
        },
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        skip_frame=skip_frame,
        gamma=gamma,
        metadata={},
    )

    def get_broker_state(self):
        x_broker = np.concatenate(
            [
                np.asarray(self.sliding_stat['broker_value'])[..., None],
                np.asarray(self.sliding_stat['unrealized_pnl'])[..., None],
                np.asarray(self.sliding_stat['realized_pnl'])[..., None],
                np.asarray(self.sliding_stat['broker_cash'])[..., None],
                np.asarray(self.sliding_stat['exposure'])[..., None],
                # np.asarray(self.sliding_stat['episode_step'])[..., None],
                # np.asarray(self.sliding_stat['reward'])[..., None],
                # np.asarray(self.sliding_stat['action'])[..., None],
                # norm_position_duration[...,None],
                # max_unrealized_pnl[..., None],
                # min_unrealized_pnl[..., None],
            ],
            axis=-1)
        return x_broker[:, None, :]

    def get_state(self):
        # Update inner state statistic and compose state:
        self.update_sliding_stat()

        self.state['external'] = self.get_market_state()
        self.state['internal'] = self.get_broker_state()

        return self.state
예제 #8
0
파일: strategy_4.py 프로젝트: kazi308/btgym
class DevStrat_4_6(BTgymBaseStrategy):
    """
    Objectives:
        external state data feature search:
            time_embedded three-channeled vector:
                - `Open` channel is one time-step difference of Open price;
                - `High` and `Low` channels are differences
                  between current Open price and current High or Low prices respectively

        internal state data feature search:
            time_embedded concatenated vector of broker and portfolio statistics
            time_embedded vector of last actions recieved (one-hot)
            time_embedded vector of rewards

        reward shaping search:
           potential-based shaping functions


    Data:
        synthetic/real
    """

    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = time_dim

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-1, high=1, shape=(time_dim, 1, 3)),
            'internal':
            spaces.Box(low=-2, high=2, shape=(avg_period, 1, 5)),
            'action':
            spaces.Box(low=0, high=1, shape=(avg_period, 1, 1)),
            'reward':
            spaces.Box(low=-1, high=1, shape=(avg_period, 1, 1)),
            'metadata':
            DictSpace({
                'type': spaces.Box(shape=(), low=0, high=1),
                'trial_num': spaces.Box(shape=(), low=0, high=10**10),
                'sample_num': spaces.Box(shape=(), low=0, high=10**10),
                'first_row': spaces.Box(shape=(), low=0, high=10**10)
            })
        },
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        skip_frame=skip_frame,
        metadata={})

    def __init__(self, **kwargs):
        """

        Args:
            **kwargs:   see BTgymBaseStrategy args.
        """
        super(DevStrat_4_6, self).__init__(**kwargs)

        self.log.debug('DEV_state_shape: {}'.format(self.p.state_shape))
        self.log.debug('DEV_skip_frame: {}'.format(self.p.skip_frame))
        self.log.debug('DEV_portfolio_actions: {}'.format(
            self.p.portfolio_actions))
        self.log.debug('DEV_drawdown_call: {}'.format(self.p.drawdown_call))
        self.log.debug('DEV_target_call: {}'.format(self.p.target_call))
        self.log.debug('DEV_dataset_stat:\n{}'.format(self.p.dataset_stat))
        self.log.debug('DEV_episode_stat:\n{}'.format(self.p.episode_stat))

        # Define data channels:
        self.channel_O = bt.Sum(self.data.open, -self.data.open(-1))
        self.channel_H = bt.Sum(self.data.high, -self.data.open)
        self.channel_L = bt.Sum(self.data.low, -self.data.open)

        # Episodic metadata:
        self.state['metadata'] = {
            'type': np.asarray(self.p.metadata['type']),
            'trial_num': np.asarray(self.p.metadata['trial_num']),
            'sample_num': np.asarray(self.p.metadata['sample_num']),
            'first_row': np.asarray(self.p.metadata['first_row'])
        }

    def get_market_state(self):
        T = 2e3  # EURUSD
        # T = 1e2 # EURUSD, Z-norm
        # T = 1 # BTCUSD

        x = np.stack([
            np.frombuffer(self.channel_O.get(size=self.time_dim)),
            np.frombuffer(self.channel_H.get(size=self.time_dim)),
            np.frombuffer(self.channel_L.get(size=self.time_dim)),
        ],
                     axis=-1)
        # Log-scale: NOT used. Seems to hurt performance.
        # x = log_transform(x)

        # Amplify and squash in [-1,1], seems to be best option as of 4.10.17:
        # T param is supposed to keep most of the signal in 'linear' part of tanh while squashing spikes.
        x_market = tanh(x * T)

        return x_market[:, None, :]

    def get_broker_state(self):
        x_broker = np.concatenate(
            [
                np.asarray(self.sliding_stat['unrealized_pnl'])[..., None],
                # max_unrealized_pnl[..., None],
                # min_unrealized_pnl[..., None],
                np.asarray(self.sliding_stat['realized_pnl'])[..., None],
                np.asarray(self.sliding_stat['broker_value'])[..., None],
                np.asarray(self.sliding_stat['broker_cash'])[..., None],
                np.asarray(self.sliding_stat['exposure'])[..., None],
                # norm_episode_duration, gamma=5)[...,None],
                # norm_position_duration, gamma=2)[...,None],
            ],
            axis=-1)
        return x_broker[:, None, :]

    def get_state(self):

        # Update inner state statistic and compose state:
        self.update_sliding_stat()

        self.state['external'] = self.get_market_state()
        self.state['internal'] = self.get_broker_state()
        self.state['action'] = np.asarray(self.sliding_stat['action'])[:, None,
                                                                       None]
        self.state['reward'] = np.asarray(self.sliding_stat['reward'])[:, None,
                                                                       None]

        return self.state

    def get_reward(self):
        """
        Shapes reward function as normalized single trade realized profit/loss,
        augmented with potential-based reward shaping functions in form of:
        F(s, a, s`) = gamma * FI(s`) - FI(s);

        - potential FI_1 is current normalized unrealized profit/loss;
        - potential FI_2 is current normalized broker value.

        Paper:
            "Policy invariance under reward transformations:
             Theory and application to reward shaping" by A. Ng et al., 1999;
             http://www.robotics.stanford.edu/~ang/papers/shaping-icml99.pdf
        """

        # All sliding statistics for this step are already updated by get_state().
        #
        # TODO: window size for stats averaging? Now it is time_dim - 1, can better be other?
        # TODO: pass actual gamma as strategy param.

        # Potential-based shaping function 1:
        # based on potential of averaged profit/loss for current opened trade (unrealized p/l):
        unrealised_pnl = np.asarray(self.sliding_stat['unrealized_pnl'])
        f1 = 1.0 * np.average(unrealised_pnl[1:]) - np.average(
            unrealised_pnl[:-1])

        # Potential-based shaping function 2:
        # based on potential of averaged broker value, normalized wrt to max drawdown and target bounds.
        norm_broker_value = np.asarray(self.sliding_stat['broker_value'])
        f2 = 1.0 * np.average(norm_broker_value[1:]) - np.average(
            norm_broker_value[:-1])

        # Main reward function: normalized realized profit/loss:
        realized_pnl = np.asarray(self.sliding_stat['realized_pnl'])[-1]

        # Weights are subject to tune:
        self.reward = 1.0 * f1 + 1.0 * f2 + 10.0 * realized_pnl
        # TODO: ------ignore-----:
        # 'Close-at-the-end' shaping term:
        # - 1.0 * self.exp_scale(avg_norm_episode_duration, gamma=6) * abs_max_norm_exposure
        # 'Do-not-expose-for-too-long' shaping term:
        # - 1.0 * self.exp_scale(avg_norm_position_duration, gamma=3)

        self.reward = np.clip(self.reward, -1, 1)

        return self.reward
예제 #9
0
class CasualConvStrategy_1(CasualConvStrategy_0):
    """
    CWT. again.
    """
    # Time embedding period:
    # time_dim = 512
    # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params
    # NOTE_2: should be power of 2 if using casual conv. state encoder
    time_dim = 128
    # time_dim = 32

    # Periods for estimating signal features,
    # note: here number of feature channels is doubled due to fact Hi/Low values computed for each period specified:

    # features_parameters = [8, 32, 128, 512]
    # features_parameters = [2, 8, 32, 64, 128]
    # features_parameters = [8, 16, 32, 64, 128, 256]
    #
    # num_features = len(features_parameters)

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are collected over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period
    # NOTE_: should be power of 2 if using casual conv. state encoder:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 0.99  # fi_gamma, should be MDP gamma decay

    reward_scale = 1  # reward multiplicator

    num_features = 16

    cwt_signal_scale = 3e3  # first gradient scaling [scalar]
    cwt_lower_bound = 3.0  # CWT scales
    cwt_upper_bound = 90.0

    state_ext_scale = np.linspace(1, 3, num=num_features)

    params = dict(
        # Note: fake `Width` dimension to stay in convention with 2d conv. dims:
        state_shape={
            'raw':
            spaces.Box(low=-100,
                       high=100,
                       shape=(time_dim, 4),
                       dtype=np.float32),
            # 'external': spaces.Box(low=-100, high=100, shape=(time_dim, 1, num_features), dtype=np.float32),
            'external':
            spaces.Box(low=-100,
                       high=100,
                       shape=(time_dim, num_features, 1),
                       dtype=np.float32),
            # 'external_2': spaces.Box(low=-100, high=100, shape=(time_dim, 1, 4), dtype=np.float32),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'datetime':
            spaces.Box(low=0, high=1, shape=(1, 5), dtype=np.float32),
            # 'expert': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32),
            # TODO: change inheritance!
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        initial_action=None,
        initial_portfolio_action=None,
        skip_frame=skip_frame,
        state_ext_scale=state_ext_scale,  # EURUSD
        state_int_scale=1.0,
        gamma=gamma,
        reward_scale=1.0,
        metadata={},
        cwt_lower_bound=cwt_lower_bound,
        cwt_upper_bound=cwt_upper_bound,
        cwt_signal_scale=cwt_signal_scale,
    )

    def __init__(self, **kwargs):
        super(CasualConvStrategy_1, self).__init__(**kwargs)
        # self.num_channels = self.p.state_shape['external'].shape[-1]
        self.num_channels = self.num_features
        # Define CWT scales:
        self.cwt_width = np.linspace(self.p.cwt_lower_bound,
                                     self.p.cwt_upper_bound, self.num_channels)

    def set_datalines(self):
        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0],
            period=(np.asarray(self.features_parameters).max() +
                    self.time_dim))
        self.data.dim_sma.plotinfo.plot = False

    def get_external_state(self):
        # Use Hi-Low median as signal:
        x = (np.frombuffer(self.data.high.get(size=self.time_dim)) +
             np.frombuffer(self.data.low.get(size=self.time_dim))) / 2

        # Differences along time dimension:
        d_x = np.gradient(x, axis=0) * self.p.cwt_signal_scale

        # Compute continuous wavelet transform using Ricker wavelet:
        cwt_x = signal.cwt(d_x, signal.ricker, self.cwt_width).T

        norm_x = cwt_x

        # Note: differences taken once again along channels axis,
        # apply weighted scaling to normalize channels
        # norm_x = np.gradient(cwt_x, axis=-1)
        # norm_x = zscore(norm_x, axis=0) * self.p.state_ext_scale
        # norm_x *= self.p.state_ext_scale

        out_x = tanh(norm_x)

        # out_x = np.clip(norm_x, -10, 10)

        # return out_x[:, None, :]
        return out_x[..., None]

    def get_external_2_state(self):
        x = np.stack([
            np.frombuffer(self.data.high.get(size=self.time_dim)),
            np.frombuffer(self.data.open.get(size=self.time_dim)),
            np.frombuffer(self.data.low.get(size=self.time_dim)),
            np.frombuffer(self.data.close.get(size=self.time_dim)),
        ],
                     axis=-1)
        # # Differences along features dimension:
        d_x = np.gradient(x, axis=-1) * self.p.cwt_signal_scale

        # Compute continuous wavelet transform using Ricker wavelet:
        # cwt_x = signal.cwt(d_x, signal.ricker, self.cwt_width).T

        norm_x = d_x

        # Note: differences taken once again along channels axis,
        # apply weighted scaling to normalize channels
        # norm_x = np.gradient(cwt_x, axis=-1)
        # norm_x = zscore(norm_x, axis=0) * self.p.state_ext_scale
        # norm_x *= self.p.state_ext_scale

        out_x = tanh(norm_x)

        # out_x = np.clip(norm_x, -10, 10)

        return out_x[:, None, :]
예제 #10
0
class CasualConvStrategy_0(CasualConvStrategy):
    """
    Casual convolutional encoder + `sliding candle` price data features instead of SMA.
    """
    # Time embedding period:
    # time_dim = 512  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params
    time_dim = 128
    # time_dim = 32

    # Periods for estimating signal features,
    # note: here number of feature channels is doubled due to fact Hi/Low values computed for each period specified:

    # features_parameters = [8, 32, 128, 512]
    # features_parameters = [2, 8, 32, 64, 128]
    features_parameters = [8, 16, 32, 64, 128, 256]

    num_features = len(features_parameters)

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are collected over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 0.99  # fi_gamma, should be MDP gamma decay

    reward_scale = 1  # reward multiplicator

    state_ext_scale = np.linspace(2e3, 1e3, num=num_features)

    params = dict(
        # Note: fake `Width` dimension to stay in convention with 2d conv. dims:
        state_shape={
            'external':
            spaces.Box(low=-100,
                       high=100,
                       shape=(time_dim, 1, num_features * 2),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'datetime':
            spaces.Box(low=0, high=1, shape=(1, 5), dtype=np.float32),
            # 'expert': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32),
            # TODO: change inheritance!
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        initial_action=None,
        initial_portfolio_action=None,
        skip_frame=skip_frame,
        state_ext_scale=state_ext_scale,  # EURUSD
        state_int_scale=1.0,
        gamma=gamma,
        reward_scale=1.0,
        metadata={},
    )

    def set_datalines(self):
        features_low = [
            MinPool(self.data, period=period)
            for period in self.features_parameters
        ]
        features_high = [
            MaxPool(self.data, period=period)
            for period in self.features_parameters
        ]

        # If `scale` was scalar - make it vector:
        if len(np.asarray(self.p.state_ext_scale).shape) < 1:
            self.p.state_ext_scale = np.repeat(
                np.asarray(self.p.state_ext_scale), self.num_features)

        # Sort features by `period` for .get_external_state() to estimate
        # more or less sensible gradient; double-stretch scale vector accordingly:
        # TODO: maybe 2 separate conv. encoders for hi/low?
        self.data.features = []
        for f1, f2 in zip(features_low, features_high):
            self.data.features += [f1, f2]

        self.p.state_ext_scale = np.repeat(self.p.state_ext_scale, 2)

        # print('p.state_ext_scale: ', self.p.state_ext_scale, self.p.state_ext_scale.shape)

        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0],
            period=(np.asarray(self.features_parameters).max() +
                    self.time_dim))
        self.data.dim_sma.plotinfo.plot = False
예제 #11
0
class CasualConvStrategy(DevStrat_4_12):
    # class CasualConvStrategy(GuidedStrategy_0_0):
    """
    Provides stream of data for casual convolutional encoder
    """
    # Time embedding period:
    time_dim = 128  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Hyperparameters for estimating signal features:
    # features_parameters = [8, 32, 64]
    features_parameters = [8, 32, 128, 512]
    num_features = len(features_parameters)

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 0.99  # fi_gamma, should be MDP gamma decay

    reward_scale = 1  # reward multiplicator

    state_ext_scale = np.linspace(4e3, 1e3, num=num_features)

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-100,
                       high=100,
                       shape=(time_dim, 1, num_features),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'datetime':
            spaces.Box(low=0, high=1, shape=(1, 5), dtype=np.float32),
            # 'expert': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32),  # TODO: change inheritance!
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        skip_frame=skip_frame,
        state_ext_scale=state_ext_scale,  # EURUSD
        state_int_scale=1.0,
        gamma=gamma,
        reward_scale=1.0,
        metadata={},
    )

    def set_datalines(self):
        self.data.features = [
            btind.SimpleMovingAverage(self.datas[0], period=period)
            for period in self.features_parameters
        ]

        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0],
            period=(np.asarray(self.features_parameters).max() +
                    self.time_dim))
        self.data.dim_sma.plotinfo.plot = False
예제 #12
0
class GuidedStrategy_0_0(DevStrat_4_12):
    """
    Augments observation state with expert actions predictions estimated by accessing entire episode data (=cheating).
    """
    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 0.99  # fi_gamma, should be MDP gamma decay

    reward_scale = 1  # reward multiplicator

    state_ext_scale = np.linspace(3e3, 1e3, num=6)

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-100,
                       high=100,
                       shape=(time_dim, 1, 6),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'datetime':
            spaces.Box(low=0, high=1, shape=(1, 5), dtype=np.float32),
            'expert':
            spaces.Box(low=0,
                       high=10,
                       shape=(len(portfolio_actions), ),
                       dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        skip_frame=skip_frame,
        state_ext_scale=state_ext_scale,  # EURUSD
        state_int_scale=1.0,
        gamma=gamma,
        reward_scale=1.0,
        metadata={},
        # Expert parameters:
        expert_config={
            'time_threshold':
            5,  # minimum peak estimation radius in number of environment steps
            'pips_threshold':
            5,  # minimum peak estimation value in number of quota points
            'pips_scale':
            1e-4,  # value of single quota point relative to price value
            'kernel_size':
            5,  # gaussian_over_action tails size in number of env. steps
            'kernel_stddev': 1,  # gaussian_over_action standard deviation
        },
    )

    def __init__(self, **kwargs):
        super(GuidedStrategy_0_0, self).__init__(**kwargs)
        self.expert = Oracle(action_space=np.arange(
            len(self.p.portfolio_actions)),
                             **self.p.expert_config)
        # self.expert = Oracle2(action_space=np.arange(len(self.p.portfolio_actions)), **self.p.expert_config)
        self.expert_actions = None
        self.current_expert_action = None

    def nextstart(self):
        """
        Overrides base method augmenting it with estimating expert actions before actual episode starts.
        """
        # This value shows how much episode records we need to spend
        # to estimate first environment observation:
        self.inner_embedding = self.data.close.buflen()
        self.log.info('Inner time embedding: {}'.format(self.inner_embedding))

        # Now when we know exact maximum possible episode length -
        #  can extract relevant episode data and make expert predictions:
        data = self.datas[0].p.dataname.as_matrix()[self.inner_embedding:, :]

        # Note: need to form sort of environment 'custom candels' by taking min and max price values over every
        # skip_frame period; this is done inside Oracle class;
        # TODO: shift actions forward to eliminate one-point prediction lag?
        # expert_actions is a matrix representing discrete distribution over actions probabilities
        # of size [max_env_steps, action_space_size]:
        self.expert_actions = self.expert.fit(
            episode_data=data, resampling_factor=self.p.skip_frame)

    def get_expert_state(self):
        self.current_expert_action = self.expert_actions[self.env_iteration]

        #print('Strat_iteration:', self.iteration)
        #print('Env_iteration:', self.env_iteration)

        return self.current_expert_action
예제 #13
0
class SSAStrategy_0(PairSpreadStrategy_0):
    """
    BivariateTimeSeriesModel decomposition based.
    """
    time_dim = 128
    avg_period = 16
    model_time_dim = 16
    portfolio_actions = ('hold', 'buy', 'sell', 'close')
    features_parameters = None
    num_features = 4

    params = dict(
        state_shape={
            'external': DictSpace(
                {
                    'ssa': spaces.Box(low=-100, high=100, shape=(time_dim, 1, num_features), dtype=np.float32),

                }
            ),
            'internal': DictSpace(
                {
                    'broker': spaces.Box(low=-100, high=100, shape=(avg_period, 1, 5), dtype=np.float32),
                    'model': spaces.Box(low=-100, high=100, shape=(model_time_dim, 1, 9), dtype=np.float32),
                }
            ),
            'expert': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32),  # not used
            'stat': spaces.Box(low=-1e6, high=1e6, shape=(3, 1), dtype=np.float32),  # for debug. proposes only
            'metadata': DictSpace(
                {
                    'type': spaces.Box(
                        shape=(),
                        low=0,
                        high=1,
                        dtype=np.uint32
                    ),
                    'trial_num': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10,
                        dtype=np.uint32
                    ),
                    'trial_type': spaces.Box(
                        shape=(),
                        low=0,
                        high=1,
                        dtype=np.uint32
                    ),
                    'sample_num': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10,
                        dtype=np.uint32
                    ),
                    'first_row': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10,
                        dtype=np.uint32
                    ),
                    'timestamp': spaces.Box(
                        shape=(),
                        low=0,
                        high=np.finfo(np.float64).max,
                        dtype=np.float64
                    ),
                    'generator': DictSpace(  # ~ S-generator params.
                        {
                            'mu': spaces.Box(
                                shape=(),
                                low=np.finfo(np.float64).min,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            ),
                            'l': spaces.Box(
                                shape=(),
                                low=0,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            ),
                            'sigma': spaces.Box(
                                shape=(),
                                low=0,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            ),
                            'x0': spaces.Box(
                                shape=(),
                                low=np.finfo(np.float64).min,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            )
                        }
                    )
                }
            )
        },
        data_model_params=dict(
            alpha=.001,
            stat_alpha=.0001,
            filter_alpha=.05,
            max_length=time_dim * 2,
            analyzer_window=10,
            p_analyzer_grouping=[[0, 1], [1, 2], [2, 3], [3, None]],
            s_analyzer_grouping=[[0, 1], [1, 2], [2, 3], [3, None]]
        ),
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        slippage=None,
        leverage=1.0,
        gamma=1.0,              # fi_gamma, should match MDP gamma decay
        reward_scale=1,         # reward multiplicator
        norm_alpha=0.001,       # float in []0, 1], renormalisation tracking decay (for synth. spread)
        norm_alpha_2=0.01,     # float in []0, 1], tracking decay for original prices
        drawdown_call=10,       # finish episode when hitting drawdown threshold, in percent.
        dataset_stat=None,      # Summary descriptive statistics for entire dataset and
        episode_stat=None,      # current episode. Got updated by server.
        time_dim=time_dim,      # time embedding period
        avg_period=avg_period,  # number of time steps reward estimation statistics are averaged over
        features_parameters=features_parameters,
        num_features=num_features,
        metadata={},
        broadcast_message={},
        trial_stat=None,
        trial_metadata=None,
        portfolio_actions=portfolio_actions,
        skip_frame=1,  # number of environment steps to skip before returning next environment response
        position_max_depth=1,
        order_size=1,  # legacy plug, to be removed <-- rework gen_6.__init__
        initial_action=None,
        initial_portfolio_action=None,
        state_int_scale=1,
        state_ext_scale=1,
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        # Bivariate model:
        self.data_model = BivariatePriceModel(**self.p.data_model_params)

        # Accumulators for 'model' observation mode:
        self.external_model_state = np.zeros([self.model_time_dim, 1, 9])

    def set_datalines(self):
        # Discard superclass dataline, use SpreadConstructor instead:
        self.data.spread = None

        # Override stat line:
        self.stat_asset = self.SpreadConstructor()

        # Spy on reward behaviour:
        self.reward_tracker = self.CumSumReward()

        initial_time_period = self.p.time_dim
        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0], period=initial_time_period)
        self.data.dim_sma.plotinfo.plot = False

    def prenext(self):
        if self.pre_iteration + 2 > self.p.time_dim - self.avg_period:
            self.update_broker_stat()
            x_upd = np.stack([
                np.asarray(self.datas[0].get(size=1)),
                np.asarray(self.datas[1].get(size=1))
            ],
                             axis=0)
            _ = self.norm_stat_tracker_2.update(
                x_upd
            )  # doubles update_broker_stat() but helps faster stabilization
            self.data_model.update(x_upd)

        elif self.pre_iteration + 2 == self.p.time_dim - self.avg_period:
            # Initialize all trackers:
            x_init = np.stack([
                np.asarray(self.datas[0].get(size=self.data.close.buflen())),
                np.asarray(self.datas[1].get(size=self.data.close.buflen()))
            ],
                              axis=0)
            _ = self.norm_stat_tracker_2.reset(x_init)
            _ = self.norm_stat_tracker.reset(
                np.asarray(self.stat_asset.get(
                    size=self.data.close.buflen()))[None, :])
            # _ = self.norm_stat_tracker.reset(np.asarray(self.stat_asset.get(size=1))[None, :])
            self.data_model.reset(x_init)

        self.pre_iteration += 1

    def nextstart(self):
        self.inner_embedding = self.data.close.buflen()
        self.log.debug('Inner time embedding: {}'.format(self.inner_embedding))

        # self.log.warning(
        #     'Pos. max. depth: {}, leverage: {}, order sizes: {:.4f}, {:.4f}'.format(
        #         self.p.position_max_depth,
        #         self.p.leverage,
        #         size_0,
        #         size_1
        #     )
        # )

    def get_normalisation(self):
        """
        Estimates current normalisation constants, updates `normalisation_state` attr.

        Returns:
            instance of NormalisationState tuple
        """
        # Update synth. spread rolling normalizers:
        x_upd = np.stack([
            np.asarray(self.datas[0].get(size=1)),
            np.asarray(self.datas[1].get(size=1))
        ],
                         axis=0)
        _ = self.norm_stat_tracker_2.update(x_upd)

        # ...and use [normalised] spread rolling mean and variance to estimate NormalisationState
        # used to normalize all broker statistics and reward:
        spread_data = np.asarray(self.stat_asset.get(size=1))

        mean, var = self.norm_stat_tracker.update(spread_data[None, :])
        var = np.clip(var, 1e-8, None)

        # Use 99% N(stat_data_mean, stat_data_std) intervals as normalisation interval:
        intervals = stats.norm.interval(.99, mean, var**.5)
        self.normalisation_state = NormalisationState(
            mean=float(mean),
            variance=float(var),
            low_interval=intervals[0][0],
            up_interval=intervals[1][0])
        return self.normalisation_state

    def get_external_state(self):
        return dict(ssa=self.get_external_ssa_state(), )

    def get_internal_state(self):
        return dict(
            broker=self.get_internal_broker_state(),
            model=self.get_data_model_state(),
        )

    def get_external_ssa_state(self):
        """
        Spread SSA decomposition.
        """
        x_upd = np.stack([
            np.asarray(self.datas[0].get(size=self.p.skip_frame)),
            np.asarray(self.datas[1].get(size=self.p.skip_frame))
        ],
                         axis=0)
        # self.log.warning('x_upd: {}'.format(x_upd.shape))
        self.data_model.update(x_upd)

        x_ssa = self.data_model.s.transform(
            size=self.p.time_dim).T  #* self.normalizer

        # Gradient along features axis:
        # dx = np.gradient(x_ssa, axis=-1)
        #
        # # Add up: gradient  along time axis:
        # # dx2 = np.gradient(dx, axis=0)
        #
        # x = np.concatenate([x_ssa_bank, dx], axis=-1)

        # Crop outliers:
        x_ssa = np.clip(x_ssa, -10, 10)
        # x_ssa = np.clip(dx, -10, 10)
        return x_ssa[:, None, :]

    def get_data_model_state(self):
        """
         Spread stochastic model parameters.
        """
        state = self.data_model.s.process.get_state()
        cross_corr = cov2corr(state.filtered.covariance)[[0, 0, 1], [1, 2, 2]]
        update = np.concatenate([
            state.filtered.mean.flatten(),
            state.filtered.variance.flatten(),
            cross_corr,
        ])
        self.external_model_state = np.concatenate(
            [self.external_model_state[1:, :, :], update[None, None, :]],
            axis=0)
        # self.external_model_state = np.gradient(self.external_model_state, axis=-1)
        return self.external_model_state

    def get_internal_broker_state(self):
        stat_lines = ('value', 'unrealized_pnl', 'realized_pnl', 'cash',
                      'exposure')
        x_broker = np.stack(
            [np.asarray(self.broker_stat[name]) for name in stat_lines],
            axis=-1)
        # self.log.warning('broker: {}'.format(x_broker))
        # self.log.warning('Ns: {}'.format(self.normalisation_state))
        # x_broker = np.gradient(x_broker, axis=-1)
        return np.clip(x_broker[:, None, :], -100, 100)
예제 #14
0
class PairSpreadStrategy_1(PairSpreadStrategy_0):
    """
    Expects pair of data streams. Encodes each asset independently.
    """

    # Time embedding period:
    time_dim = 128  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 64

    # Possible agent actions;  Note: place 'hold' first! :
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    features_parameters = (1, 4, 16, 64, 256, 1024)
    num_features = len(features_parameters)

    params = dict(
        state_shape={
            'external':
            DictSpace({
                'asset1':
                spaces.Box(low=-10,
                           high=10,
                           shape=(time_dim, 1, num_features),
                           dtype=np.float32),
                'asset2':
                spaces.Box(low=-10,
                           high=10,
                           shape=(time_dim, 1, num_features),
                           dtype=np.float32),
            }),
            'internal':
            spaces.Box(low=-100,
                       high=100,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'expert':
            spaces.Box(low=0,
                       high=10,
                       shape=(len(portfolio_actions), ),
                       dtype=np.float32),
            'stat':
            spaces.Box(low=-100, high=100, shape=(3, 1), dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
                # TODO: make generator parameters names standard
                'generator':
                DictSpace({
                    'mu':
                    spaces.Box(shape=(),
                               low=np.finfo(np.float64).min,
                               high=np.finfo(np.float64).max,
                               dtype=np.float64),
                    'l':
                    spaces.Box(shape=(),
                               low=0,
                               high=np.finfo(np.float64).max,
                               dtype=np.float64),
                    'sigma':
                    spaces.Box(shape=(),
                               low=0,
                               high=np.finfo(np.float64).max,
                               dtype=np.float64),
                    'x0':
                    spaces.Box(shape=(),
                               low=np.finfo(np.float64).min,
                               high=np.finfo(np.float64).max,
                               dtype=np.float64)
                })
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        slippage=None,
        leverage=1.0,
        gamma=1.0,  # fi_gamma, ~ should match MDP gamma decay
        reward_scale=1,  # reward multiplicator
        norm_alpha=0.001,  # renormalisation tracking decay in []0, 1]
        norm_alpha_2=
        0.01,  # float in []0, 1], tracking decay for original prices
        drawdown_call=
        10,  # finish episode when hitting drawdown treshghold, in percent.
        dataset_stat=
        None,  # Summary descriptive statistics for entire dataset and
        episode_stat=None,  # current episode. Got updated by server.
        time_dim=time_dim,  # time embedding period
        avg_period=
        avg_period,  # number of time steps reward estimation statistics are averaged over
        features_parameters=features_parameters,
        num_features=num_features,
        metadata={},
        broadcast_message={},
        trial_stat=None,
        trial_metadata=None,
        portfolio_actions=portfolio_actions,
        skip_frame=
        1,  # number of environment steps to skip before returning next environment response
        position_max_depth=1,
        order_size=1,  # legacy plug, to be removed <-- rework gen_6.__init__
        initial_action=None,
        initial_portfolio_action=None,
        state_int_scale=1,
        state_ext_scale=1,
    )

    def set_datalines(self):
        # Override stat line:
        self.stat_asset = self.data.spread = SpreadConstructor()

        # Spy on reward behaviour:
        self.reward_tracker = CumSumReward()

        self.data.std1 = btind.StdDev(self.datas[0],
                                      period=self.p.time_dim,
                                      safepow=True)
        self.data.std1.plotinfo.plot = False

        self.data.std2 = btind.StdDev(self.datas[1],
                                      period=self.p.time_dim,
                                      safepow=True)
        self.data.std2.plotinfo.plot = False

        self.data.features1 = [
            btind.EMA(self.datas[0], period=period)
            for period in self.p.features_parameters
        ]
        self.data.features2 = [
            btind.EMA(self.datas[1], period=period)
            for period in self.p.features_parameters
        ]

        initial_time_period = np.asarray(
            self.p.features_parameters).max() + self.p.time_dim
        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0], period=initial_time_period)
        self.data.dim_sma.plotinfo.plot = False

    def get_external_state(self):
        """
        Attempt to include avg decomp. of original normalised spread
        """
        x_sma1 = np.stack([
            feature.get(size=self.p.time_dim)
            for feature in self.data.features1
        ],
                          axis=-1)
        scale = 1 / np.clip(self.data.std1[0], 1e-10, None)
        x_sma1 *= scale  # <-- more or less ok

        # Gradient along features axis:
        dx1 = np.gradient(x_sma1, axis=-1)
        dx1 = np.clip(dx1, -10, 10)

        x_sma2 = np.stack([
            feature.get(size=self.p.time_dim)
            for feature in self.data.features2
        ],
                          axis=-1)
        scale = 1 / np.clip(self.data.std2[0], 1e-10, None)
        x_sma2 *= scale  # <-- more or less ok

        # Gradient along features axis:
        dx2 = np.gradient(x_sma2, axis=-1)
        dx2 = np.clip(dx2, -10, 10)

        return {
            'asset1': dx1[:, None, :],
            'asset2': dx2[:, None, :],
        }
예제 #15
0
class MonoSpreadOUStrategy_0(BaseStrategy5):
    """
    Expects spread as single generated data stream.
    """
    # Time embedding period:
    time_dim = 128  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 90

    # Possible agent actions;  Note: place 'hold' first! :
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    features_parameters = (1, 4, 16, 64, 256, 1024)
    num_features = len(features_parameters)

    params = dict(
        state_shape={
            'external': spaces.Box(low=-10, high=10, shape=(time_dim, 1, num_features*2), dtype=np.float32),
            'internal': spaces.Box(low=-2, high=2, shape=(avg_period, 1, 6), dtype=np.float32),
            'expert': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32),
            'metadata': DictSpace(
                {
                    'type': spaces.Box(
                        shape=(),
                        low=0,
                        high=1,
                        dtype=np.uint32
                    ),
                    'trial_num': spaces.Box(
                        shape=(),
                        low=0,
                        high=10**10,
                        dtype=np.uint32
                    ),
                    'trial_type': spaces.Box(
                        shape=(),
                        low=0,
                        high=1,
                        dtype=np.uint32
                    ),
                    'sample_num': spaces.Box(
                        shape=(),
                        low=0,
                        high=10**10,
                        dtype=np.uint32
                    ),
                    'first_row': spaces.Box(
                        shape=(),
                        low=0,
                        high=10**10,
                        dtype=np.uint32
                    ),
                    'timestamp': spaces.Box(
                        shape=(),
                        low=0,
                        high=np.finfo(np.float64).max,
                        dtype=np.float64
                    ),
                    # TODO: make generator parameters names standard
                    'generator': DictSpace(
                        {
                            'mu': spaces.Box(
                                shape=(),
                                low=np.finfo(np.float64).min,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            ),
                            'l': spaces.Box(
                                shape=(),
                                low=0,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            ),
                            'sigma': spaces.Box(
                                shape=(),
                                low=0,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            ),
                            'x0': spaces.Box(
                                shape=(),
                                low=np.finfo(np.float64).min,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            )
                        }
                    )
                }
            )
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        slippage=None,
        leverage=1.0,
        gamma=0.99,             # fi_gamma, should match MDP gamma decay
        reward_scale=1,         # reward multiplicator
        drawdown_call=10,       # finish episode when hitting drawdown treshghold , in percent.
        target_call=10,         # finish episode when reaching profit target, in percent.
        dataset_stat=None,      # Summary descriptive statistics for entire dataset and
        episode_stat=None,      # current episode. Got updated by server.
        time_dim=time_dim,      # time embedding period
        avg_period=avg_period,  # number of time steps reward estimation statistics are averaged over
        features_parameters=features_parameters,
        num_features=num_features,
        metadata={},
        broadcast_message={},
        trial_stat=None,
        trial_metadata=None,
        portfolio_actions=portfolio_actions,
        skip_frame=1,       # number of environment steps to skip before returning next environment response
        order_size=None,
        initial_action=None,
        initial_portfolio_action=None,
        state_int_scale=1,
        state_ext_scale=1,
    )

    def __init__(self, **kwargs):
        super(MonoSpreadOUStrategy_0, self).__init__(**kwargs)
        self.data.high = self.data.low = self.data.close = self.data.open
        self.current_expert_action = np.zeros(len(self.p.portfolio_actions))
        self.state['metadata'] = self.metadata

        # Combined dataset related, infer OU generator params:
        generator_keys = self.p.state_shape['metadata'].spaces['generator'].spaces.keys()
        if 'generator' not in self.p.metadata.keys() or self.p.metadata['generator'] == {}:
            self.metadata['generator'] = {key: np.asarray(0) for key in generator_keys}

        else:
            # self.metadata['generator'] = {key: self.p.metadata['generator'][key] for key in generator_keys}

            # TODO: clean up this mess, refine names:

            self.metadata['generator'] = {
                'l': self.p.metadata['generator']['ou_lambda'],
                'mu': self.p.metadata['generator']['ou_mu'],
                'sigma': self.p.metadata['generator']['ou_sigma'],
                'x0': 0,
            }

            # Make scalars np arrays to comply gym.spaces.Box specs:
            for k, v in self.metadata['generator'].items():
                self.metadata['generator'][k] = np.asarray(v)

        self.last_delta_total_pnl = 0
        self.last_pnl = 0

        self.log.debug('startegy got broadcast_msg: <<{}>>'.format(self.p.broadcast_message))

    def get_broadcast_message(self):
        return {
            'data_model_psi': np.zeros([2, 3]),
            'iteration': self.iteration
        }

    def set_datalines(self):
        self.data.high = self.data.low = self.data.close = self.data.open

        self.data.std = btind.StdDev(self.data.open, period=self.p.time_dim, safepow=True)
        self.data.std.plotinfo.plot = False

        self.data.features = [
            btind.SimpleMovingAverage(self.data.open, period=period) for period in self.p.features_parameters
        ]
        initial_time_period = np.asarray(self.p.features_parameters).max() + self.p.time_dim
        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0],
            period=initial_time_period
        )
        self.data.dim_sma.plotinfo.plot = False

    def get_external_state(self):
        x_sma = np.stack(
            [
                feature.get(size=self.p.time_dim) for feature in self.data.features
            ],
            axis=-1
        )
        scale = 1 / np.clip(self.data.std[0], 1e-10, None)
        x_sma *= scale  # <-- more or less ok

        # Gradient along features axis:
        dx = np.gradient(x_sma, axis=-1)

        # Add up: gradient  along time axis:
        dx2 = np.gradient(dx, axis=0)

        # TODO: different conv. encoders for these two types of features:
        x = np.concatenate([dx, dx2], axis=-1)

        # Crop outliers:
        x = np.clip(x, -10, 10)
        return x[:, None, :]

    def get_internal_state(self):

        x_broker = np.concatenate(
            [
                np.asarray(self.broker_stat['value'])[..., None],
                np.asarray(self.broker_stat['unrealized_pnl'])[..., None],
                np.asarray(self.broker_stat['total_unrealized_pnl'])[..., None],
                np.asarray(self.broker_stat['realized_pnl'])[..., None],
                np.asarray(self.broker_stat['cash'])[..., None],
                np.asarray(self.broker_stat['exposure'])[..., None],
            ],
            axis=-1
        )
        x_broker = tanh(np.gradient(x_broker, axis=-1) * self.p.state_int_scale)
        return x_broker[:, None, :]

    def get_expert_state(self):
        """
        Not used.
        """
        return np.zeros(len(self.p.portfolio_actions))

    def get_reward(self):
        """
        Shapes reward function as normalized single trade realized profit/loss,
        augmented with potential-based reward shaping functions in form of:
        F(s, a, s`) = gamma * FI(s`) - FI(s);
        Potential FI_1 is current normalized unrealized profit/loss.

        Paper:
            "Policy invariance under reward transformations:
             Theory and application to reward shaping" by A. Ng et al., 1999;
             http://www.robotics.stanford.edu/~ang/papers/shaping-icml99.pdf
        """

        # All sliding statistics for this step are already updated by get_state().

        # Potential-based shaping function 1:
        # based on potential of averaged profit/loss for current opened trade (unrealized p/l):
        unrealised_pnl = np.asarray(self.broker_stat['unrealized_pnl'])
        current_pos_duration = self.broker_stat['pos_duration'][-1]

        # We want to estimate potential `fi = gamma*fi_prime - fi` of current opened position,
        # thus need to consider different cases given skip_fame parameter:
        if current_pos_duration == 0:
            # Set potential term to zero if there is no opened positions:
            fi_1 = 0
            fi_1_prime = 0
        else:
            current_avg_period = min(self.avg_period, current_pos_duration)

            fi_1 = self.last_pnl
            fi_1_prime = np.average(unrealised_pnl[- current_avg_period:])

        # Potential term 1:
        f1 = self.p.gamma * fi_1_prime - fi_1
        self.last_pnl = fi_1_prime

        # Potential-based shaping function 2:
        # based on potential of averaged profit/loss for global unrealized pnl:
        total_pnl = np.asarray(self.broker_stat['total_unrealized_pnl'])
        delta_total_pnl = np.average(total_pnl[-self.p.skip_frame:]) - np.average(total_pnl[:-self.p.skip_frame])

        fi_2 = delta_total_pnl
        fi_2_prime = self.last_delta_total_pnl

        # Potential term 2:
        f2 = self.p.gamma * fi_2_prime - fi_2
        self.last_delta_total_pnl = delta_total_pnl

        # Main reward function: normalized realized profit/loss:
        realized_pnl = np.asarray(self.broker_stat['realized_pnl'])[-self.p.skip_frame:].sum()

        # Weights are subject to tune:
        self.reward = (10 * f1 + 0 * f2 + 10.0 * realized_pnl) * self.p.reward_scale
        self.reward = np.clip(self.reward, -self.p.reward_scale, self.p.reward_scale)

        return self.reward
예제 #16
0
class CasualConvStrategyMulti(CasualConvStrategy_0):
    """
    CWT + multiply data streams.
    Beta - data names are class hard-coded.
    TODO: pass data streams names as params
    """
    # Time embedding period:
    # NOTE_2: should be power of 2 if using casual conv. state encoder
    time_dim = 128

    # Periods for estimating signal features,
    # note: here number of feature channels is doubled due to fact Hi/Low values computed for each period specified:

    # features_parameters = [8, 32, 128, 512]
    # features_parameters = [2, 8, 32, 64, 128]
    # features_parameters = [8, 16, 32, 64, 128, 256]
    #
    # num_features = len(features_parameters)

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are collected over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period
    # NOTE_: should be power of 2 if using casual conv. state encoder:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 0.99  # fi_gamma, should be MDP gamma decay

    reward_scale = 1  # reward multiplicator

    num_features = 16  # TODO: 8? (was: 16)

    cwt_signal_scale = 3e3  # first gradient scaling [scalar]
    cwt_lower_bound = 4.0  # CWT scales  TODO: 8.? (was : 3.)
    cwt_upper_bound = 100.0

    state_ext_scale = {
        'USD': np.linspace(1, 2, num=num_features),
        'GBP': np.linspace(1, 2, num=num_features),
        'CHF': np.linspace(1, 2, num=num_features),
        'JPY': np.linspace(5e-3, 1e-2, num=num_features),
    }
    order_size = {
        'USD': 1000,
        'GBP': 1000,
        'CHF': 1000,
        'JPY': 1000,
    }

    params = dict(
        # Note: fake `Width` dimension to stay in convention with 2d conv. dims:
        state_shape={
            'raw':
            spaces.Box(low=-1000,
                       high=1000,
                       shape=(time_dim, 4),
                       dtype=np.float32),
            'external':
            DictSpace({
                'USD':
                spaces.Box(low=-1000,
                           high=1000,
                           shape=(time_dim, 1, num_features),
                           dtype=np.float32),
                'GBP':
                spaces.Box(low=-1000,
                           high=1000,
                           shape=(time_dim, 1, num_features),
                           dtype=np.float32),
                'CHF':
                spaces.Box(low=-1000,
                           high=1000,
                           shape=(time_dim, 1, num_features),
                           dtype=np.float32),
                'JPY':
                spaces.Box(low=-1000,
                           high=1000,
                           shape=(time_dim, 1, num_features),
                           dtype=np.float32),
            }),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'datetime':
            spaces.Box(low=0, high=1, shape=(1, 5), dtype=np.float32),
            # 'expert': DictSpace(
            #     {
            #         'USD': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32),
            #         'GBP': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32),
            #         'CHF': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32),
            #         'JPY': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32),
            #     }
            # ),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='EUR',
        asset_names={'USD', 'GBP', 'CHF', 'JPY'},
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        initial_action=None,
        initial_portfolio_action=None,
        order_size=order_size,
        skip_frame=skip_frame,
        state_ext_scale=state_ext_scale,
        state_int_scale=1.0,
        gamma=gamma,
        # base_dataline='USD',
        reward_scale=1.0,
        metadata={},
        cwt_lower_bound=cwt_lower_bound,
        cwt_upper_bound=cwt_upper_bound,
        cwt_signal_scale=cwt_signal_scale,
    )

    def __init__(self, **kwargs):
        self.data_streams = {}
        super(CasualConvStrategyMulti, self).__init__(**kwargs)
        # self.num_channels = self.p.state_shape['external'].shape[-1]
        self.num_channels = self.num_features
        # Define CWT scales:
        self.cwt_width = np.linspace(self.p.cwt_lower_bound,
                                     self.p.cwt_upper_bound, self.num_channels)

        # print('p: ', dir(self.p))

    def nextstart(self):
        """
        Overrides base method augmenting it with estimating expert actions before actual episode starts.
        """
        # This value shows how much episode records we need to spend
        # to estimate first environment observation:
        self.inner_embedding = self.data.close.buflen()
        self.log.info('Inner time embedding: {}'.format(self.inner_embedding))

        # Now when we know exact maximum possible episode length -
        #  can extract relevant episode data and make expert predictions:
        # data = self.datas[0].p.dataname.as_matrix()[self.inner_embedding:, :]
        data = {
            d._name: d.p.dataname.values[self.inner_embedding:, :]
            for d in self.datas
        }

        # Note: need to form sort of environment 'custom candels' by taking min and max price values over every
        # skip_frame period; this is done inside Oracle class;
        # TODO: shift actions forward to eliminate one-point prediction lag?
        # expert_actions is a matrix representing discrete distribution over actions probabilities
        # of size [max_env_steps, action_space_size]:

        # self.expert_actions = {
        #     key: self.expert.fit(episode_data=line, resampling_factor=self.p.skip_frame)
        #     for key, line in data.items()
        # }

    # def get_expert_state(self):
    #     # self.current_expert_action = self.expert_actions[self.env_iteration]
    #     self.current_expert_action = {
    #         key: line[self.env_iteration] for key, line in self.expert_actions.items()
    #     }
    #
    #     return self.current_expert_action

    def set_datalines(self):
        self.data_streams = {stream._name: stream for stream in self.datas}
        # self.data = self.data_streams[self.p.base_dataline] # TODO: ??!!

        self.data.dim_sma = btind.SimpleMovingAverage(
            self.data,
            period=(np.asarray(self.features_parameters).max() +
                    self.time_dim))
        self.data.dim_sma.plotinfo.plot = False

    def get_external_state(self):
        return {
            key: self.get_single_external_state(key)
            for key in self.data_streams.keys()
        }

    def get_single_external_state(self, key):
        # Use Hi-Low median as signal:
        x = (np.frombuffer(self.data_streams[key].high.get(size=self.time_dim))
             + np.frombuffer(
                 self.data_streams[key].low.get(size=self.time_dim))) / 2

        # Differences along time dimension:
        d_x = np.gradient(x, axis=0) * self.p.cwt_signal_scale

        # Compute continuous wavelet transform using Ricker wavelet:
        cwt_x = signal.cwt(d_x, signal.ricker, self.cwt_width).T

        norm_x = cwt_x

        # Note: differences taken once again along channels axis,
        # apply weighted scaling to normalize channels
        # norm_x = np.gradient(cwt_x, axis=-1)
        # norm_x = zscore(norm_x, axis=0) * self.p.state_ext_scale
        norm_x *= self.p.state_ext_scale[key]

        out_x = tanh(norm_x)

        # out_x = np.clip(norm_x, -10, 10)

        # return out_x[:, None, :]
        return out_x[:, None, :]
예제 #17
0
class PairSpreadStrategyRS_1(PairSpreadStrategy_0):
    """
    Regime-switching mode for agent actions:
    agent actions defined as 'stay in three possible regimes:
        keep out (or close open position),
        take long position,
        take short position

    Supports adding up position size
    """
    # Time embedding period:
    time_dim = 128  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 30

    # Possible agent actions as three regimes:
    # keep out (or close open position), take long position, take short position:
    portfolio_actions = ('out', 'long', 'long_+', 'short', 'short_+')

    features_parameters = (1, 4, 16, 64, 256, 1024)
    num_features = len(features_parameters)

    params = dict(
        state_shape={
            'external': spaces.Box(low=-10, high=10, shape=(time_dim, 1, num_features * 2), dtype=np.float32),
            'internal': spaces.Box(low=-2, high=2, shape=(avg_period, 1, 6), dtype=np.float32),
            'expert': spaces.Box(low=0, high=10, shape=(len(portfolio_actions),), dtype=np.float32),
            'metadata': DictSpace(
                {
                    'type': spaces.Box(
                        shape=(),
                        low=0,
                        high=1,
                        dtype=np.uint32
                    ),
                    'trial_num': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10,
                        dtype=np.uint32
                    ),
                    'trial_type': spaces.Box(
                        shape=(),
                        low=0,
                        high=1,
                        dtype=np.uint32
                    ),
                    'sample_num': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10,
                        dtype=np.uint32
                    ),
                    'first_row': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10,
                        dtype=np.uint32
                    ),
                    'timestamp': spaces.Box(
                        shape=(),
                        low=0,
                        high=np.finfo(np.float64).max,
                        dtype=np.float64
                    ),
                    # TODO: make generator parameters names standard
                    'generator': DictSpace(
                        {
                            'mu': spaces.Box(
                                shape=(),
                                low=np.finfo(np.float64).min,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            ),
                            'l': spaces.Box(
                                shape=(),
                                low=0,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            ),
                            'sigma': spaces.Box(
                                shape=(),
                                low=0,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            ),
                            'x0': spaces.Box(
                                shape=(),
                                low=np.finfo(np.float64).min,
                                high=np.finfo(np.float64).max,
                                dtype=np.float64
                            )
                        }
                    )
                }
            )
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        slippage=None,
        leverage=1.0,
        gamma=0.99,  # fi_gamma, should match MDP gamma decay
        reward_scale=1,  # reward multiplicator
        drawdown_call=10,  # finish episode when hitting drawdown treshghold , in percent.
        target_call=10,  # finish episode when reaching profit target, in percent.
        dataset_stat=None,  # Summary descriptive statistics for entire dataset and
        episode_stat=None,  # current episode. Got updated by server.
        time_dim=time_dim,  # time embedding period
        avg_period=avg_period,  # number of time steps reward estimation statistics are averaged over
        features_parameters=features_parameters,
        num_features=num_features,
        metadata={},
        trial_stat=None,
        trial_metadata=None,
        portfolio_actions=portfolio_actions,
        skip_frame=1,  # number of environment steps to skip before returning next environment response
        order_size=None,
        initial_action=None,
        initial_portfolio_action=None,
        state_int_scale=1,
        state_ext_scale=1,
    )

    def __init__(self, **kwargs):
        super(PairSpreadStrategyRS_1, self).__init__(**kwargs)
        self.position_type = 'out'

    def _next_discrete(self, action):
        """
        Manages spread virtual positions in 'regime' mode.

        Args:
            action:     dict, string encoding of btgym.spaces.ActionDictSpace

        """
        # Here we expect action dict to contain single key:
        single_action = action[self.action_key]

        if not self.is_done_enabled:  # episode termination flag, no orders allowed
            if self.position_type == 'short':
                # Already short:
                if single_action == 'short':
                    pass
                elif single_action == 'short_+':
                    self.short_spread()
                    self.broker_message = '{}_SHORT added up; '.format(self.action_key) + self.broker_message
                else:
                    # action in ['long', 'long_+', 'out']:
                    self.close_spread()
                    self.position_type = 'out'
                    self.broker_message = 'new {}_OUT created; '.format(self.action_key) + self.broker_message

            elif self.position_type == 'long':
                # Already long:
                if single_action == 'long':
                    pass
                elif single_action == 'long_+':
                    self.long_spread()
                    self.broker_message = '{}_LONG added up; '.format(self.action_key) + self.broker_message
                else:
                    # action in ['short', 'short_+', 'out']:
                    self.close_spread()
                    self.position_type = 'out'
                    self.broker_message = 'new {}_OUT created; '.format(self.action_key) + self.broker_message

            else:
                # Neutral:
                if single_action == 'long':
                    self.long_spread()
                    self.position_type = 'long'
                    self.broker_message = '{}_LONG created; '.format(self.action_key) + self.broker_message
                elif single_action == 'short':
                    self.short_spread()
                    self.position_type = 'short'
                    self.broker_message = 'new {}_SHORT created; '.format(self.action_key) + self.broker_message
                elif single_action == 'out':
                    pass
예제 #18
0
    def __init__(self, engine, dataset=None, **kwargs):
        """
        This class requires dataset, strategy, engine instances to be passed explicitly.

        Args:
            dataset(btgym.datafeed):                        BTgymDataDomain instance;
            engine(bt.Cerebro):                             environment simulation engine, any bt.Cerebro subclass,

        Keyword Args:
            network_address=`tcp://127.0.0.1:` (str):       BTGym_server address.
            port=5500 (int):                                network port to use for server - API_shell communication.
            data_master=True (bool):                        let this environment control over data_server;
            data_network_address=`tcp://127.0.0.1:` (str):  data_server address.
            data_port=4999 (int):                           network port to use for server -- data_server communication.
            connect_timeout=20 (int):                       server connection timeout in seconds.
            render_enabled=True (bool):                     enable rendering for this environment;
            render_modes=['human', 'episode'] (list):       `episode` - plotted episode results;
                                                            `human` - raw_state observation.
            **render_args (any):                            any render-related args, passed through to renderer class.
            verbose=0 (int):                                verbosity mode, {0 - WARNING, 1 - INFO, 2 - DEBUG}
            log_level=None (int):                           logbook level {DEBUG=10, INFO=11, NOTICE=12, WARNING=13},
                                                            overrides `verbose` arg;
            log=None (logbook.Logger):                      external logbook logger,
                                                            overrides `log_level` and `verbose` args.
            task=0 (int):                                   environment id


        """
        self.dataset = dataset
        self.engine = engine
        # Parameters and default values:
        self.params = dict(
            engine={},
            dataset={},
            strategy={},
            render={},
        )
        # Update self attributes, remove used kwargs:
        for key in dir(self):
            if key in kwargs.keys():
                setattr(self, key, kwargs.pop(key))

        self.metadata = {'render.modes': self.render_modes}

        # Logging and verbosity control:
        if self.log is None:
            StreamHandler(sys.stdout).push_application()
            if self.log_level is None:
                log_levels = [(0, NOTICE), (1, INFO), (2, DEBUG)]
                self.log_level = WARNING
                for key, value in log_levels:
                    if key == self.verbose:
                        self.log_level = value
            self.log = Logger('BTgymMultiDataShell_{}'.format(self.task), level=self.log_level)

        # Network parameters:
        self.network_address += str(self.port)
        self.data_network_address += str(self.data_port)

        # Set server rendering:
        if self.render_enabled:
            self.renderer = BTgymRendering(self.metadata['render.modes'], log_level=self.log_level, **kwargs)

        else:
            self.renderer = BTgymNullRendering()
            self.log.info('Rendering disabled. Call to render() will return null-plug image.')

        # Append logging:
        self.renderer.log = self.log

        # Update params -1: pull from renderer, remove used kwargs:
        self.params['render'].update(self.renderer.params)
        for key in self.params['render'].keys():
            if key in kwargs.keys():
                _ = kwargs.pop(key)

        if self.data_master:
            try:
                assert self.dataset is not None

            except AssertionError:
                msg = 'Dataset instance shoud be provided for data_master environment.'
                self.log.error(msg)
                raise ValueError(msg)

            # Append logging:
            self.dataset.set_logger(self.log_level, self.task)

            # Update params -2: pull from dataset, remove used kwargs:
            self.params['dataset'].update(self.dataset.params)
            for key in self.params['dataset'].keys():
                if key in kwargs.keys():
                    _ = kwargs.pop(key)

        # Connect/Start data server (and get dataset statistic):
        self.log.info('Connecting data_server...')
        self._start_data_server()
        self.log.info('...done.')
        # After starting data-server we have self.assets attribute, dataset statisitc etc. filled.

        # Define observation space shape, minimum / maximum values and agent action space.
        # Retrieve values from configured engine or...

        # ...Update params -4:
        # Pull strategy defaults to environment params dict :
        for t_key, t_value in self.engine.strats[0][0][0].params._gettuple():
            self.params['strategy'][t_key] = t_value

        # Update it with values from strategy 'passed-to params':
        for key, value in self.engine.strats[0][0][2].items():
            self.params['strategy'][key] = value

        self.asset_names = self.params['strategy']['asset_names']
        self.server_actions = {name: self.params['strategy']['portfolio_actions'] for name in self.asset_names}
        self.cash_name = self.params['strategy']['cash_name']

        self.params['strategy']['initial_action'] = self.get_initial_action()
        self.params['strategy']['initial_portfolio_action'] = self.get_initial_portfolio_action()

        try:
            assert set(self.asset_names).issubset(set(self.data_lines_names))

        except AssertionError:
            msg = 'Assets names should be subset of data_lines names, but got: assets: {}, data_lines: {}'.format(
                set(self.asset_names), set(self.data_lines_names)
            )
            self.log.error(msg)
            raise ValueError(msg)

        # ... Push it all back (don't ask):
        for key, value in self.params['strategy'].items():
            self.engine.strats[0][0][2][key] = value

        # For 'raw_state' min/max values,
        # the only way is to infer from raw Dataset price values (we already got those from data_server):
        if 'raw_state' in self.params['strategy']['state_shape'].keys():
            # Exclude 'volume' from columns we count:
            self.dataset_columns.remove('volume')

            # print(self.params['strategy'])
            # print('self.engine.strats[0][0][2]:', self.engine.strats[0][0][2])
            # print('self.engine.strats[0][0][0].params:', self.engine.strats[0][0][0].params._gettuple())

            # Override with absolute price min and max values:
            self.params['strategy']['state_shape']['raw_state'].low = \
                self.engine.strats[0][0][2]['state_shape']['raw_state'].low = \
                np.zeros(self.params['strategy']['state_shape']['raw_state'].shape) + \
                self.dataset_stat.loc['min', self.dataset_columns].min()

            self.params['strategy']['state_shape']['raw_state'].high = \
                self.engine.strats[0][0][2]['state_shape']['raw_state'].high = \
                np.zeros(self.params['strategy']['state_shape']['raw_state'].shape) + \
                self.dataset_stat.loc['max', self.dataset_columns].max()

            self.log.info('Inferring `state_raw` high/low values form dataset: {:.6f} / {:.6f}.'.
                          format(self.dataset_stat.loc['min', self.dataset_columns].min(),
                                 self.dataset_stat.loc['max', self.dataset_columns].max()))

        # Set observation space shape from engine/strategy parameters:
        self.observation_space = DictSpace(self.params['strategy']['state_shape'])

        self.log.debug('Obs. shape: {}'.format(self.observation_space.spaces))

        # Set action space and corresponding server messages:
        self.action_space = ActionDictSpace(
            base_actions=self.params['strategy']['portfolio_actions'],
            assets=self.asset_names
        )

        self.log.debug('Act. space shape: {}'.format(self.action_space.spaces))

        # Finally:
        self.server_response = None
        self.env_response = None

        # if not self.data_master:
        self._start_server()
        self.closed = False

        self.log.info('Environment is ready.')
예제 #19
0
파일: strategy_4.py 프로젝트: kazi308/btgym
class DevStrat_4_7(DevStrat_4_6):
    """
    _4_6 +:
    Sliding statistics avg_period disentangled from time embedding dim;
    Only one last step sliding stats are used for internal state;
    """

    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 1.0  # fi_gamma, should be MDP gamma decay, but somehow undiscounted works better <- ??!

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-1, high=1, shape=(time_dim, 1, 3)),
            'internal':
            spaces.Box(low=-2, high=2, shape=(1, 1, 5)),
            #'raw_state': spaces.Box(low=-10, high=10, shape=(time_dim, 4)),
            #'action': spaces.Box(low=0, high=1, shape=(avg_period, 1, 1)),
            #'reward': spaces.Box(low=-1, high=1, shape=(avg_period, 1, 1)),
            'metadata':
            DictSpace({
                'type': spaces.Box(shape=(), low=0, high=1),
                'trial_num': spaces.Box(shape=(), low=0, high=10**10),
                'sample_num': spaces.Box(shape=(), low=0, high=10**10),
                'first_row': spaces.Box(shape=(), low=0, high=10**10)
            })
        },
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        skip_frame=skip_frame,
        gamma=gamma,
        metadata={})

    def __init__(self, **kwargs):
        super(DevStrat_4_7, self).__init__(**kwargs)

    def get_broker_state(self):
        x_broker = np.stack([
            self.sliding_stat['broker_value'][-1],
            self.sliding_stat['unrealized_pnl'][-1],
            self.sliding_stat['realized_pnl'][-1],
            self.sliding_stat['broker_cash'][-1],
            self.sliding_stat['exposure'][-1],
            # self.sliding_stat['episode_step'][-1],
            # self.sliding_stat['reward'][-1],
            # self.sliding_stat['action'][-1],
            # norm_position_duration[-1],
        ])
        return x_broker[None, None, :]

    def get_state(self):
        # Update inner state statistic and compose state:
        self.update_sliding_stat()

        self.state['external'] = self.get_market_state()
        self.state['internal'] = self.get_broker_state()

        return self.state

    def get_reward(self):
        """
        Shapes reward function as normalized single trade realized profit/loss,
        augmented with potential-based reward shaping functions in form of:
        F(s, a, s`) = gamma * FI(s`) - FI(s);

        - potential FI_1 is current normalized unrealized profit/loss;
        - potential FI_2 is current normalized broker value.
        - FI_3: penalizing exposure toward the end of episode

        Paper:
            "Policy invariance under reward transformations:
             Theory and application to reward shaping" by A. Ng et al., 1999;
             http://www.robotics.stanford.edu/~ang/papers/shaping-icml99.pdf
        """

        # All sliding statistics for this step are already updated by get_state().
        debug = {}

        # Potential-based shaping function 1:
        # based on potential of averaged profit/loss for current opened trade (unrealized p/l):
        unrealised_pnl = np.asarray(self.sliding_stat['unrealized_pnl'])
        f1 = self.p.gamma * np.average(unrealised_pnl[1:]) - np.average(
            unrealised_pnl[:-1])
        #f1 = self.p.gamma * discounted_average(unrealised_pnl[1:], self.p.gamma)\
        #     - discounted_average(unrealised_pnl[:-1], self.p.gamma)

        debug['f1'] = f1

        # Potential-based shaping function 2:
        # based on potential of averaged broker value, normalized wrt to max drawdown and target bounds.
        norm_broker_value = np.asarray(self.sliding_stat['broker_value'])
        f2 = self.p.gamma * np.average(norm_broker_value[1:]) - np.average(
            norm_broker_value[:-1])
        #f2 = self.p.gamma * discounted_average(norm_broker_value[1:], self.p.gamma)\
        #     - discounted_average(norm_broker_value[:-1], self.p.gamma)

        debug['f2'] = f2

        # Potential-based shaping function 3:
        # negative potential of abs. size of position, exponentially weighted wrt. episode steps
        abs_exposure = np.abs(np.asarray(self.sliding_stat['exposure']))
        time = np.asarray(self.sliding_stat['episode_step'])
        #time_w = exp_scale(np.average(time[:-1]), gamma=5)
        #time_w_prime = exp_scale(np.average(time[1:]), gamma=5)
        #f3 = - 1.0 * time_w_prime * np.average(abs_exposure[1:]) #+ time_w * np.average(abs_exposure[:-1])
        f3 = - self.p.gamma * exp_scale(time[-1], gamma=3) * abs_exposure[-1] + \
             exp_scale(time[-2], gamma=3) * abs_exposure[-2]
        debug['f3'] = f3

        # Main reward function: normalized realized profit/loss:
        realized_pnl = np.asarray(self.sliding_stat['realized_pnl'])[-1]
        debug['f_real_pnl'] = 10 * realized_pnl

        # Weights are subject to tune:
        self.reward = 1.0 * f1 + 1.0 * f2 + 0.0 * f3 + 10.0 * realized_pnl

        debug['r'] = self.reward
        debug['b_v'] = self.sliding_stat['broker_value'][-1]
        debug['unreal_pnl'] = self.sliding_stat['unrealized_pnl'][-1]
        debug['iteration'] = self.iteration

        #for k, v in debug.items():
        #    print('{}: {}'.format(k, v))
        #print('\n')

        # TODO: ------ignore-----:
        # 'Do-not-expose-for-too-long' shaping term:
        # - 1.0 * self.exp_scale(avg_norm_position_duration, gamma=3)

        self.reward = np.clip(self.reward, -1, 1)

        return self.reward
예제 #20
0
class BTgymEnv(gym.Env):
    """
    Base OpenAI Gym API shell for Backtrader backtesting/trading library.
    """
    # Datafeed Server management:
    data_master = True
    data_network_address = 'tcp://127.0.0.1:'  # using localhost.
    data_port = 4999
    data_server = None
    data_server_pid = None
    data_context = None
    data_socket = None
    data_server_response = None

    # Dataset:
    dataset = None  # BTgymDataset instance.
    dataset_stat = None

    # Backtrader engine:
    engine = None  # bt.Cerbro subclass for server to execute.

    # Strategy:
    strategy = None  # strategy to use if no <engine> class been passed.

    # Server and network:
    server = None  # Server process.
    context = None  # ZMQ context.
    socket = None  # ZMQ socket, client side.
    port = 5500  # network port to use.
    network_address = 'tcp://127.0.0.1:'  # using localhost.
    ctrl_actions = ('_done', '_reset', '_stop', '_getstat', '_render'
                    )  # server control messages.
    server_response = None

    # Connection timeout:
    connect_timeout = 220  # server connection timeout in seconds.
    #connect_timeout_step = 0.01  # time between retries in seconds.

    # Rendering:
    render_enabled = True
    render_modes = [
        'human',
        'episode',
    ]
    # `episode` - plotted episode results.
    # `human` - raw_state observation in conventional human-readable format.
    #  <obs_space_key> - rendering of arbitrary state presented in observation_space with same key.

    renderer = None  # Rendering support.
    rendered_rgb = dict()  # Keep last rendered images for each mode.

    # Logging and id:
    log = None
    log_level = None  # logbook level: NOTICE, WARNING, INFO, DEBUG etc. or its integer equivalent;
    verbose = 0  # verbosity mode, valid only if no `log_level` arg has been provided:
    # 0 - WARNING, 1 - INFO, 2 - DEBUG.
    task = 0
    asset_names = ('default_asset', )
    data_lines_names = ('default_asset', )
    cash_name = 'default_cash'

    closed = True

    def __init__(self, **kwargs):
        """

        Keyword Args:
            filename=None (str, list):                      csv data file.
            **datafeed_args (any):                          any datafeed-related args, passed through to
                                                            default btgym.datafeed class.
            dataset=None (btgym.datafeed):                  BTgymDataDomain instance,
                                                            overrides `filename` or any other datafeed-related args.
            strategy=None (btgym.startegy):                 strategy to be used by `engine`, any subclass of
                                                            btgym.strategy.base.BTgymBaseStrateg
            engine=None (bt.Cerebro):                       environment simulation engine, any bt.Cerebro subclass,
                                                            overrides `strategy` arg.
            network_address=`tcp://127.0.0.1:` (str):       BTGym_server address.
            port=5500 (int):                                network port to use for server - API_shell communication.
            data_master=True (bool):                        let this environment control over data_server;
            data_network_address=`tcp://127.0.0.1:` (str):  data_server address.
            data_port=4999 (int):                           network port to use for server -- data_server communication.
            connect_timeout=20 (int):                       server connection timeout in seconds.
            render_enabled=True (bool):                     enable rendering for this environment;
            render_modes=['human', 'episode'] (list):       `episode` - plotted episode results;
                                                            `human` - raw_state observation.
            **render_args (any):                            any render-related args, passed through to renderer class.
            verbose=0 (int):                                verbosity mode, {0 - WARNING, 1 - INFO, 2 - DEBUG}
            log_level=None (int):                           logbook level {DEBUG=10, INFO=11, NOTICE=12, WARNING=13},
                                                            overrides `verbose` arg;
            log=None (logbook.Logger):                      external logbook logger,
                                                            overrides `log_level` and `verbose` args.
            task=0 (int):                                   environment id

        Environment kwargs applying logic::

            if <engine> kwarg is given:
                do not use default engine and strategy parameters;
                ignore <strategy> kwarg and all strategy and engine-related kwargs.

            else (no <engine>):
                use default engine parameters;
                if any engine-related kwarg is given:
                    override corresponding default parameter;

                if <strategy> is given:
                    do not use default strategy parameters;
                    if any strategy related kwarg is given:
                        override corresponding strategy parameter;

                else (no <strategy>):
                    use default strategy parameters;
                    if any strategy related kwarg is given:
                        override corresponding strategy parameter;

            if <dataset> kwarg is given:
                do not use default dataset parameters;
                ignore dataset related kwargs;

            else (no <dataset>):
                use default dataset parameters;
                    if  any dataset related kwarg is given:
                        override corresponding dataset parameter;

            If any <other> kwarg is given:
                override corresponding default parameter.
        """
        # Parameters and default values:
        self.params = dict(

            # Backtrader engine mandatory parameters:
            engine=dict(
                start_cash=100.0,  # initial trading capital.
                broker_commission=
                0.001,  # trade execution commission, default is 0.1% of operation value.
                fixed_stake=10,  # single trade stake is fixed type by def.
            ),
            # Dataset mandatory parameters:
            dataset=dict(filename=None, ),
            strategy=dict(state_shape=dict(), ),
            render=dict(),
        )
        p2 = dict(  # IS HERE FOR REFERENCE ONLY
            # Strategy related parameters:
            # Observation state shape is dictionary of Gym spaces,
            # at least should contain `raw_state` field.
            # By convention first dimension of every Gym Box space is time embedding one;
            # one can define any shape; should match env.observation_space.shape.
            # observation space state min/max values,
            # For `raw_state' - absolute min/max values from BTgymDataset will be used.
            state_shape=dict(raw=spaces.Box(
                shape=(10, 4), low=-100, high=100, dtype=np.float32)),
            drawdown_call=
            None,  # episode maximum drawdown threshold, default is 90% of initial value.
            portfolio_actions=None,
            # agent actions,
            # should consist with BTgymStrategy order execution logic;
            # defaults are: 0 - 'do nothing', 1 - 'buy', 2 - 'sell', 3 - 'close position'.
            skip_frame=None,
            # Number of environment steps to skip before returning next response,
            # e.g. if set to 10 -- agent will interact with environment every 10th episode step;
            # Every other step agent's action is assumed to be 'hold'.
            # Note: INFO part of environment response is a list of all skipped frame's info's,
            #       i.e. [info[-9], info[-8], ..., info[0].
        )
        # Update self attributes, remove used kwargs:
        for key in dir(self):
            if key in kwargs.keys():
                setattr(self, key, kwargs.pop(key))

        self.metadata = {'render.modes': self.render_modes}

        # Logging and verbosity control:
        if self.log is None:
            StreamHandler(sys.stdout).push_application()
            if self.log_level is None:
                log_levels = [(0, NOTICE), (1, INFO), (2, DEBUG)]
                self.log_level = WARNING
                for key, value in log_levels:
                    if key == self.verbose:
                        self.log_level = value
            self.log = Logger('BTgymAPIshell_{}'.format(self.task),
                              level=self.log_level)

        # Network parameters:
        self.network_address += str(self.port)
        self.data_network_address += str(self.data_port)

        # Set server rendering:
        if self.render_enabled:
            self.renderer = BTgymRendering(self.metadata['render.modes'],
                                           log_level=self.log_level,
                                           **kwargs)

        else:
            self.renderer = BTgymNullRendering()
            self.log.info(
                'Rendering disabled. Call to render() will return null-plug image.'
            )

        # Append logging:
        self.renderer.log = self.log

        # Update params -1: pull from renderer, remove used kwargs:
        self.params['render'].update(self.renderer.params)
        for key in self.params['render'].keys():
            if key in kwargs.keys():
                _ = kwargs.pop(key)

        # Disable multiply data streams (multi-assets) [for data-master]:
        try:
            assert not isinstance(self.dataset, BTgymMultiData)

        except AssertionError:
            self.log.error(
                'Using multiply data streams with base BTgymEnv class not supported. Use designated class.'
            )
            raise ValueError

        if self.data_master:
            # DATASET preparation, only data_master executes this:
            #
            if self.dataset is not None:
                # If BTgymDataset instance has been passed:
                # do nothing.
                msg = 'Custom Dataset class used.'

            else:
                # If no BTgymDataset has been passed,
                # Make default dataset with given CSV file:
                try:
                    os.path.isfile(str(self.params['dataset']['filename']))

                except:
                    raise FileNotFoundError(
                        'Dataset source data file not specified/not found')

                # Use kwargs to instantiate dataset:
                self.dataset = BTgymDataset(**kwargs)
                msg = 'Base Dataset class used.'

            # Append logging:
            self.dataset.set_logger(self.log_level, self.task)

            # Update params -2: pull from dataset, remove used kwargs:
            self.params['dataset'].update(self.dataset.params)
            for key in self.params['dataset'].keys():
                if key in kwargs.keys():
                    _ = kwargs.pop(key)

            self.log.info(msg)

        # Connect/Start data server (and get dataset configuration and statistic):
        self.log.info('Connecting data_server...')
        self._start_data_server()
        self.log.info('...done.')

        # After starting data-server we have self.data_names attribute filled.

        # ENGINE preparation:
        # Update params -3: pull engine-related kwargs, remove used:
        for key in self.params['engine'].keys():
            if key in kwargs.keys():
                self.params['engine'][key] = kwargs.pop(key)

        if self.engine is not None:
            # If full-blown bt.Cerebro() subclass has been passed:
            # Update info:
            msg = 'Custom Cerebro class used.'
            self.strategy = msg
            for key in self.params['engine'].keys():
                self.params['engine'][key] = msg

        # Note: either way, bt.observers.DrawDown observer [and logger] will be added to any BTgymStrategy instance
        # by BTgymServer process at runtime.

        else:
            # Default configuration for Backtrader computational engine (Cerebro),
            # if no bt.Cerebro() custom subclass has been passed,
            # get base class Cerebro(), using kwargs on top of defaults:
            self.engine = bt.Cerebro()
            msg = 'Base Cerebro class used.'

            # First, set STRATEGY configuration:
            if self.strategy is not None:
                # If custom strategy has been passed:
                msg2 = 'Custom Strategy class used.'

            else:
                # Base class strategy :
                self.strategy = BTgymBaseStrategy
                msg2 = 'Base Strategy class used.'

            # Add, using kwargs on top of defaults:
            #self.log.debug('kwargs for strategy: {}'.format(kwargs))
            strat_idx = self.engine.addstrategy(self.strategy, **kwargs)

            msg += ' ' + msg2

            # Second, set Cerebro-level configuration:
            self.engine.broker.setcash(self.params['engine']['start_cash'])
            self.engine.broker.setcommission(
                self.params['engine']['broker_commission'])
            self.engine.addsizer(bt.sizers.SizerFix,
                                 stake=self.params['engine']['fixed_stake'])

        self.log.info(msg)

        # Define observation space shape, minimum / maximum values and agent action space.
        # Retrieve values from configured engine or...

        # ...Update params -4:
        # Pull strategy defaults to environment params dict :
        for t_key, t_value in self.engine.strats[0][0][0].params._gettuple():
            self.params['strategy'][t_key] = t_value

        # Update it with values from strategy 'passed-to params':
        for key, value in self.engine.strats[0][0][2].items():
            self.params['strategy'][key] = value

        self.asset_names = self.params['strategy']['asset_names']
        self.server_actions = {
            name: self.params['strategy']['portfolio_actions']
            for name in self.asset_names
        }
        self.cash_name = self.params['strategy']['cash_name']

        self.params['strategy']['initial_action'] = self.get_initial_action()
        self.params['strategy'][
            'initial_portfolio_action'] = self.get_initial_portfolio_action()

        # Only single  asset is supported by base class:
        try:
            assert len(list(self.asset_names)) == 1

        except AssertionError:
            self.log.error(
                'Using multiply assets with base BTgymEnv class not supported. Use designated class.'
            )
            raise ValueError

        try:
            assert set(self.asset_names).issubset(set(self.data_lines_names))

        except AssertionError:
            msg = 'Assets names should be subset of data_lines names, but got: assets: {}, data_lines: {}'.format(
                set(self.asset_names), set(self.data_lines_names))
            self.log.error(msg)
            raise ValueError(msg)

        # ... Push it all back (don't ask):
        for key, value in self.params['strategy'].items():
            self.engine.strats[0][0][2][key] = value

        # For 'raw_state' min/max values,
        # the only way is to infer from raw Dataset price values (we already got those from data_server):
        if 'raw' in self.params['strategy']['state_shape'].keys():
            # Exclude 'volume' from columns we count:
            self.dataset_columns.remove('volume')

            #print(self.params['strategy'])
            #print('self.engine.strats[0][0][2]:', self.engine.strats[0][0][2])
            #print('self.engine.strats[0][0][0].params:', self.engine.strats[0][0][0].params._gettuple())

            # Override with absolute price min and max values:
            self.params['strategy']['state_shape']['raw'].low =\
                self.engine.strats[0][0][2]['state_shape']['raw'].low =\
                np.zeros(self.params['strategy']['state_shape']['raw'].shape) +\
                self.dataset_stat.loc['min', self.dataset_columns].min()

            self.params['strategy']['state_shape']['raw'].high = \
                self.engine.strats[0][0][2]['state_shape']['raw'].high = \
                np.zeros(self.params['strategy']['state_shape']['raw'].shape) + \
                self.dataset_stat.loc['max', self.dataset_columns].max()

            self.log.info(
                'Inferring `state[raw]` high/low values form dataset: {:.6f} / {:.6f}.'
                .format(
                    self.dataset_stat.loc['min', self.dataset_columns].min(),
                    self.dataset_stat.loc['max', self.dataset_columns].max()))

        # Set observation space shape from engine/strategy parameters:
        self.observation_space = DictSpace(
            self.params['strategy']['state_shape'])

        self.log.debug('Obs. shape: {}'.format(self.observation_space.spaces))
        #self.log.debug('Obs. min:\n{}\nmax:\n{}'.format(self.observation_space.low, self.observation_space.high))

        # Set action space (one-key dict for this class) and corresponding server messages:
        self.action_space = ActionDictSpace(
            base_actions=self.params['strategy']['portfolio_actions'],
            assets=self.asset_names)

        # Finally:
        self.server_response = None
        self.env_response = None

        self._start_server()
        self.closed = False

        self.log.info('Environment is ready.')

    def _seed(self, seed=None):
        """
        Sets env. random seed.

        Args:
            seed:   int or None
        """
        np.random.seed(seed)

    @staticmethod
    def _comm_with_timeout(
        socket,
        message,
    ):
        """
        Exchanges messages via socket, timeout sensitive.

        Args:
            socket: zmq connected socket to communicate via;
            message: message to send;

        Note:
            socket zmq.RCVTIMEO and zmq.SNDTIMEO should be set to some finite number of milliseconds.

        Returns:
            dictionary:
                `status`: communication result;
                `message`: received message if status == `ok` or None;
                `time`: remote side response time.
        """
        response = dict(
            status='ok',
            message=None,
        )
        try:
            socket.send_pyobj(message)

        except zmq.ZMQError as e:
            if e.errno == zmq.EAGAIN:
                response['status'] = 'send_failed_due_to_connect_timeout'

            else:
                response['status'] = 'send_failed_for_unknown_reason'
            return response

        start = time.time()
        try:
            response['message'] = socket.recv_pyobj()
            response['time'] = time.time() - start

        except zmq.ZMQError as e:
            if e.errno == zmq.EAGAIN:
                response['status'] = 'receive_failed_due_to_connect_timeout'

            else:
                response['status'] = 'receive_failed_for_unknown_reason'
            return response

        return response

    def _start_server(self):
        """
        Configures backtrader REQ/REP server instance and starts server process.
        """

        # Ensure network resources:
        # 1. Release client-side, if any:
        if self.context:
            self.context.destroy()
            self.socket = None

        # 2. Kill any process using server port:
        cmd = "kill $( lsof -i:{} -t ) > /dev/null 2>&1".format(self.port)
        os.system(cmd)

        # Set up client channel:
        self.context = zmq.Context()
        self.socket = self.context.socket(zmq.REQ)
        self.socket.setsockopt(zmq.RCVTIMEO, self.connect_timeout * 1000)
        self.socket.setsockopt(zmq.SNDTIMEO, self.connect_timeout * 1000)
        self.socket.connect(self.network_address)

        # Configure and start server:
        self.server = BTgymServer(
            cerebro=self.engine,
            render=self.renderer,
            network_address=self.network_address,
            data_network_address=self.data_network_address,
            connect_timeout=self.connect_timeout,
            log_level=self.log_level,
            task=self.task,
        )
        self.server.daemon = False
        self.server.start()
        # Wait for server to startup:
        time.sleep(1)

        # Check connection:
        self.log.info('Server started, pinging {} ...'.format(
            self.network_address))

        self.server_response = self._comm_with_timeout(
            socket=self.socket, message={'ctrl': 'ping!'})
        if self.server_response['status'] in 'ok':
            self.log.info('Server seems ready with response: <{}>'.format(
                self.server_response['message']))

        else:
            msg = 'Server unreachable with status: <{}>.'.format(
                self.server_response['status'])
            self.log.error(msg)
            raise ConnectionError(msg)

        self._closed = False

    def _stop_server(self):
        """
        Stops BT server process, releases network resources.
        """
        if self.server:

            if self._force_control_mode():
                # In case server is running and client side is ok:
                self.socket.send_pyobj({'ctrl': '_stop'})
                self.server_response = self.socket.recv_pyobj()

            else:
                self.server.terminate()
                self.server.join()
                self.server_response = 'Server process terminated.'

            self.log.info('{} Exit code: {}'.format(self.server_response,
                                                    self.server.exitcode))

        # Release client-side, if any:
        if self.context:
            self.context.destroy()
            self.socket = None

    def _force_control_mode(self):
        """Puts BT server to control mode.
        """
        # Check is there any faults with server process and connection?
        network_error = [
            (not self.server or not self.server.is_alive(),
             'No running server found. Hint: forgot to call reset()?'),
            (not self.context
             or self.context.closed, 'No network connection found.'),
        ]
        for (err, msg) in network_error:
            if err:
                self.log.info(msg)
                self.server_response = msg
                return False

            # If everything works, insist to go 'control':
            self.server_response = {}
            attempt = 0

            while 'ctrl' not in self.server_response:
                self.socket.send_pyobj({'ctrl': '_done'})
                self.server_response = self.socket.recv_pyobj()
                attempt += 1
                self.log.debug(
                    'FORCE CONTROL MODE attempt: {}.\nResponse: {}'.format(
                        attempt, self.server_response))

            return True

    def _assert_response(self, response):
        """
        Simple watcher:
        roughly checks if we really talking to environment (== episode is running).
        Rises exception if response given is not as expected.
        """
        try:
            assert type(response) == tuple and len(response) == 4

        except AssertionError:
            msg = 'Unexpected environment response: {}\nHint: Forgot to call reset() or reset_data()?'.format(
                response)
            self.log.exception(msg)
            raise AssertionError(msg)

        self.log.debug('Response checker received:\n{}\nas type: {}'.format(
            response, type(response)))

    def _print_space(self, space, _tab=''):
        """
        Parses observation space shape or response.

        Args:
            space: gym observation space or state.

        Returns:
            description as string.
        """
        response = ''
        if type(space) in [dict, OrderedDict]:
            for key, value in space.items():
                response += '\n{}{}:{}\n'.format(
                    _tab, key, self._print_space(value, '   '))

        elif type(space) in [spaces.Dict, DictSpace]:
            for s in space.spaces:
                response += self._print_space(s, '   ')

        elif type(space) in [tuple, list]:
            for i in space:
                response += self._print_space(i, '   ')

        elif type(space) == np.ndarray:
            response += '\n{}array of shape: {}, low: {}, high: {}'.format(
                _tab, space.shape, space.min(), space.max())

        else:
            response += '\n{}{}, '.format(_tab, space)
            try:
                response += 'low: {}, high: {}'.format(space.low.min(),
                                                       space.high.max())

            except (KeyError, AttributeError, ArithmeticError,
                    ValueError) as e:
                pass
                #response += '\n{}'.format(e)

        return response

    def get_initial_action(self):
        return {asset: 0 for asset in self.asset_names}

    def get_initial_portfolio_action(self):
        return {
            asset: actions[0]
            for asset, actions in self.server_actions.items()
        }

    def reset(self, **kwargs):
        """
        Implementation of OpenAI Gym env.reset method. Starts new episode. Episode data are sampled
        according to data provider class logic, controlled via kwargs. Refer `BTgym_Server` and data provider
        classes for details.

        Args:
            kwargs:         any kwargs; this dictionary is passed through to BTgym_server side without any checks and
                            modifications; currently used for data sampling control;

        Returns:
            observation space state

        Notes:
            Current kwargs accepted is::


                episode_config=dict(
                    get_new=True,
                    sample_type=0,
                    b_alpha=1,
                    b_beta=1
                ),
                trial_config=dict(
                    get_new=True,
                    sample_type=0,
                    b_alpha=1,
                    b_beta=1
                )

        """
        # Data Server check:
        if self.data_master:
            if not self.data_server or not self.data_server.is_alive():
                self.log.info('No running data_server found, starting...')
                self._start_data_server()

            # Domain dataset status check:
            self.data_server_response = self._comm_with_timeout(
                socket=self.data_socket, message={'ctrl': '_get_info'})
            if not self.data_server_response['message']['dataset_is_ready']:
                self.log.info(
                    'Data domain `reset()` called prior to `reset_data()` with [possibly inconsistent] defaults.'
                )
                self.reset_data()

        # Server process check:
        if not self.server or not self.server.is_alive():
            self.log.info('No running server found, starting...')
            self._start_server()

        if self._force_control_mode():
            self.server_response = self._comm_with_timeout(socket=self.socket,
                                                           message={
                                                               'ctrl':
                                                               '_reset',
                                                               'kwargs': kwargs
                                                           })
            # Get initial environment response:
            self.env_response = self.step(self.get_initial_action())

            # Check (once) if it is really (o,r,d,i) tuple:
            self._assert_response(self.env_response)

            # Check (once) if state_space is as expected:
            try:
                assert self.observation_space.contains(self.env_response[0])

            except (AssertionError, AttributeError) as e:
                msg1 = self._print_space(self.observation_space.spaces)
                msg2 = self._print_space(self.env_response[0])
                msg3 = ''
                for step_info in self.env_response[-1]:
                    msg3 += '{}\n'.format(step_info)
                msg = ('\nState observation shape/range mismatch!\n' +
                       'Space set by env: \n{}\n' +
                       'Space returned by server: \n{}\n' +
                       'Full response:\n{}\n' + 'Reward: {}\n' + 'Done: {}\n' +
                       'Info:\n{}\n' +
                       'Hint: Wrong Strategy.get_state() parameters?').format(
                           msg1,
                           msg2,
                           self.env_response[0],
                           self.env_response[1],
                           self.env_response[2],
                           msg3,
                       )
                self.log.exception(msg)
                self._stop_server()
                raise AssertionError(msg)

            return self.env_response[0]

        else:
            msg = 'Something went wrong. env.reset() can not get response from server.'
            self.log.exception(msg)
            raise ChildProcessError(msg)

    def step(self, action):
        """
        Implementation of OpenAI Gym env.step() method.
        Makes a step in the environment.

        Args:
            action:     int or dict, action compatible to env.action_space

        Returns:
            tuple (Observation, Reward, Info, Done)

        """
        # If we got int as action - try to treat it as an action for single-valued action space dict:
        if isinstance(action, int) and len(
                list(self.action_space.spaces.keys())) == 1:
            a = copy.deepcopy(action)
            action = {key: a for key in self.action_space.spaces.keys()}

        # Are you in the list, ready to go and all that?
        if self.action_space.contains(action)\
            and not self._closed\
            and (self.socket is not None)\
            and not self.socket.closed:
            pass

        else:
            msg = ('\nAt least one of these is true:\n' +
                   'Action error: (space is {}, action sent is {}): {}\n' +
                   'Environment closed: {}\n' +
                   'Network error [socket doesnt exists or closed]: {}\n' +
                   'Hint: forgot to call reset()?').format(
                       self.action_space,
                       action,
                       not self.action_space.contains(action),
                       self._closed,
                       not self.socket or self.socket.closed,
                   )
            self.log.exception(msg)
            raise AssertionError(msg)

        # Send action (as dict of strings) to backtrader engine, receive environment response:
        action_as_dict = {
            key: self.server_actions[key][value]
            for key, value in action.items()
        }
        #print('step: ', action, action_as_dict)
        env_response = self._comm_with_timeout(
            socket=self.socket, message={'action': action_as_dict})
        if not env_response['status'] in 'ok':
            msg = '.step(): server unreachable with status: <{}>.'.format(
                env_response['status'])
            self.log.error(msg)
            raise ConnectionError(msg)

        self.env_response = env_response['message']

        return self.env_response

    def close(self):
        """
        Implementation of OpenAI Gym env.close method.
        Puts BTgym server in Control Mode.
        """
        self.log.debug('close.call()')
        self._stop_server()
        self._stop_data_server()
        self.log.info('Environment closed.')

    def get_stat(self):
        """
        Returns last run episode statistics.

        Note:
            when invoked, forces running episode to terminate.
        """
        if self._force_control_mode():
            self.socket.send_pyobj({'ctrl': '_getstat'})
            return self.socket.recv_pyobj()

        else:
            return self.server_response

    def render(self, mode='other_mode', close=False):
        """
        Implementation of OpenAI Gym env.render method.
        Visualises current environment state.

        Args:
            `mode`:     str, any of these::

                            `human` - current state observation as price lines;
                            `episode` - plotted results of last completed episode.
                            [other_key] - corresponding to any custom observation space key
        """
        if close:
            return None

        if not self._closed\
            and self.socket\
            and not self.socket.closed:
            pass

        else:
            msg = ('\nCan'
                   't get renderings.'
                   '\nAt least one of these is true:\n' +
                   'Environment closed: {}\n' +
                   'Network error [socket doesnt exists or closed]: {}\n' +
                   'Hint: forgot to call reset()?').format(
                       self._closed,
                       not self.socket or self.socket.closed,
                   )
            self.log.warning(msg)
            return None
        if mode not in self.render_modes:
            raise ValueError('Unexpected render mode {}'.format(mode))
        self.socket.send_pyobj({'ctrl': '_render', 'mode': mode})

        rgb_array_dict = self.socket.recv_pyobj()

        self.rendered_rgb.update(rgb_array_dict)

        return self.rendered_rgb[mode]

    def _stop(self):
        """
        Finishes current episode if any, does nothing otherwise. Leaves server running.
        """
        if self._force_control_mode():
            self.log.info('Episode stop forced.')

    def _restart_server(self):
        """Restarts server.
        """
        self._stop_server()
        self._start_server()
        self.log.info('Server restarted.')

    def _start_data_server(self):
        """
        For data_master environment:
            - configures backtrader REQ/REP server instance and starts server process.

        For others:
            - establishes network connection to existing data_server.
        """
        self.data_server = None

        # Ensure network resources:
        # 1. Release client-side, if any:
        if self.data_context:
            self.data_context.destroy()
            self.data_socket = None

        # Only data_master launches/stops data_server process:
        if self.data_master:
            # 2. Kill any process using server port:
            cmd = "kill $( lsof -i:{} -t ) > /dev/null 2>&1".format(
                self.data_port)
            os.system(cmd)

            # Configure and start server:
            self.data_server = BTgymDataFeedServer(
                dataset=self.dataset,
                network_address=self.data_network_address,
                log_level=self.log_level,
                task=self.task)
            self.data_server.daemon = False
            self.data_server.start()
            # Wait for server to startup
            time.sleep(1)

        # Set up client channel:
        self.data_context = zmq.Context()
        self.data_socket = self.data_context.socket(zmq.REQ)
        self.data_socket.setsockopt(zmq.RCVTIMEO, self.connect_timeout * 1000)
        self.data_socket.setsockopt(zmq.SNDTIMEO, self.connect_timeout * 1000)
        self.data_socket.connect(self.data_network_address)

        # Check connection:
        self.log.debug('Pinging data_server at: {} ...'.format(
            self.data_network_address))

        self.data_server_response = self._comm_with_timeout(
            socket=self.data_socket, message={'ctrl': 'ping!'})
        if self.data_server_response['status'] in 'ok':
            self.log.debug(
                'Data_server seems ready with response: <{}>'.format(
                    self.data_server_response['message']))

        else:
            msg = 'Data_server unreachable with status: <{}>.'.\
                format(self.data_server_response['status'])
            self.log.error(msg)
            raise ConnectionError(msg)

        # Get info and statistic:
        self.dataset_stat, self.dataset_columns, self.data_server_pid, self.data_lines_names = self._get_dataset_info(
        )

    def _stop_data_server(self):
        """
        For data_master:
            - stops BT server process, releases network resources.
        """
        if self.data_master:
            if self.data_server is not None and self.data_server.is_alive():
                # In case server is running and is ok:
                self.data_socket.send_pyobj({'ctrl': '_stop'})
                self.data_server_response = self.data_socket.recv_pyobj()

            else:
                self.data_server.terminate()
                self.data_server.join()
                self.data_server_response = 'Data_server process terminated.'

            self.log.info('{} Exit code: {}'.format(self.data_server_response,
                                                    self.data_server.exitcode))

        if self.data_context:
            self.data_context.destroy()
            self.data_socket = None

    def _restart_data_server(self):
        """
        Restarts data_server.
        """
        if self.data_master:
            self._stop_data_server()
            self._start_data_server()

    def _get_dataset_info(self):
        """
        Retrieves dataset configuration and descriptive statistic.
        """
        self.data_socket.send_pyobj({'ctrl': '_get_info'})
        self.data_server_response = self.data_socket.recv_pyobj()

        return self.data_server_response['dataset_stat'],\
            self.data_server_response['dataset_columns'],\
            self.data_server_response['pid'], \
            self.data_server_response['data_names']

    def reset_data(self, **kwargs):
        """
        Resets data provider class used, whatever it means for that class. Gets data_server ready to provide data.
        Supposed to be called before first env.reset().

        Note:
            when invoked, forces running episode to terminate.

        Args:
            **kwargs:   data provider class .reset() method specific.
        """
        if self.closed:
            self._start_server()
            if self.data_master:
                self._start_data_server()
            self.closed = False

        else:
            _ = self._force_control_mode()

        if self.data_master:
            if self.data_server is None or not self.data_server.is_alive():
                self._restart_data_server()

            self.data_server_response = self._comm_with_timeout(
                socket=self.data_socket,
                message={
                    'ctrl': '_reset_data',
                    'kwargs': kwargs
                })
            if self.data_server_response['status'] in 'ok':
                self.log.debug(
                    'Dataset seems ready with response: <{}>'.format(
                        self.data_server_response['message']))

            else:
                msg = 'Data_server unreachable with status: <{}>.'. \
                    format(self.data_server_response['status'])
                self.log.error(msg)
                raise SystemExit(msg)

        else:
            pass
예제 #21
0
파일: strategy_4.py 프로젝트: kazi308/btgym
class DevStrat_4_9(DevStrat_4_7):
    """
    Uses hard-coded market state features.
    """
    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 1.0  # fi_gamma, should be MDP gamma decay, but somehow undiscounted works better <- wtf?!

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-100, high=100, shape=(time_dim, 1, 8)),
            'internal':
            spaces.Box(low=-2, high=2, shape=(1, 1, 5)),
            'metadata':
            DictSpace({
                'type': spaces.Box(shape=(), low=0, high=1),
                'trial_num': spaces.Box(shape=(), low=0, high=10**10),
                'sample_num': spaces.Box(shape=(), low=0, high=10**10),
                'first_row': spaces.Box(shape=(), low=0, high=10**10)
            })
        },
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        skip_frame=skip_frame,
        gamma=gamma,
        metadata={},
    )

    def set_datalines(self):
        self.data.sma_4 = btind.SimpleMovingAverage(self.datas[0], period=4)
        self.data.sma_8 = btind.SimpleMovingAverage(self.datas[0], period=8)
        self.data.sma_16 = btind.SimpleMovingAverage(self.datas[0], period=16)
        self.data.sma_32 = btind.SimpleMovingAverage(self.datas[0], period=32)
        self.data.sma_64 = btind.SimpleMovingAverage(self.datas[0], period=64)
        self.data.sma_128 = btind.SimpleMovingAverage(self.datas[0],
                                                      period=128)
        self.data.sma_256 = btind.SimpleMovingAverage(self.datas[0],
                                                      period=256)

        self.data.dim_sma = btind.SimpleMovingAverage(self.datas[0],
                                                      period=(256 +
                                                              self.time_dim))
        self.data.dim_sma.plotinfo.plot = False

    def get_market_state(self):
        T = 1e4  # EURUSD
        # T = 1 # BTCUSD

        x = np.stack([
            np.frombuffer(self.data.open.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_4.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_8.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_16.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_32.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_64.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_128.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_256.get(size=self.time_dim)),
        ],
                     axis=-1)
        # Gradient along features axis:
        x = np.gradient(x, axis=1) * T

        # Log-scale:
        x = log_transform(x)
        return x[:, None, :]

    def get_state(self):
        # Update inner state statistic and compose state:
        self.update_sliding_stat()

        self.state['external'] = self.get_market_state()
        self.state['internal'] = self.get_broker_state()

        return self.state
예제 #22
0
class DevStrat_4_11(DevStrat_4_10):
    """
    4_10 + Another set of sma-features, grads for broker state
    """
    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 1.0  # fi_gamma, should be MDP gamma decay, but somehow undiscounted works better <- wtf?!

    reward_scale = 1  # reward multiplicator

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape=
        {
            'external': spaces.Box(low=-100, high=100, shape=(time_dim, 1, 5)),
            'internal': spaces.Box(low=-2, high=2, shape=(avg_period, 1, 5)),
            'metadata': DictSpace(
                {
                    'type': spaces.Box(
                        shape=(),
                        low=0,
                        high=1
                    ),
                    'trial_num': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10
                    ),
                    'sample_num': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10
                    ),
                    'first_row': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10
                    )
                }
            )
        },
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        skip_frame=skip_frame,
        gamma=gamma,
        reward_scale=reward_scale,
        metadata={},
    )

    def set_datalines(self):
        self.data.sma_16 = btind.SimpleMovingAverage(self.datas[0], period=16)
        self.data.sma_32 = btind.SimpleMovingAverage(self.datas[0], period=32)
        self.data.sma_64 = btind.SimpleMovingAverage(self.datas[0], period=64)
        self.data.sma_128 = btind.SimpleMovingAverage(self.datas[0], period=128)
        self.data.sma_256 = btind.SimpleMovingAverage(self.datas[0], period=256)

        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0],
            period=(256 + self.time_dim)
        )
        self.data.dim_sma.plotinfo.plot = False

        # Define data channels:
        #self.channel_dO = bt.Sum(self.data.open, - self.data.open(-1))
        #self.channel_dH = bt.Sum(self.data.high, - self.data.high(-1))
        #self.channel_dL = bt.Sum(self.data.low, - self.data.low(-1))

    def get_market_state(self):
        T = 2e3  # EURUSD
        T2 = 2e3

        if False:
            x_p = np.stack(
                [
                    #np.frombuffer(self.channel_dO.get(size=self.time_dim)),
                    #np.frombuffer(self.channel_dH.get(size=self.time_dim)),
                    #np.frombuffer(self.channel_dL.get(size=self.time_dim)),

                    np.frombuffer(self.data.open.get(size=self.time_dim)),
                    np.frombuffer(self.data.high.get(size=self.time_dim)),
                    np.frombuffer(self.data.low.get(size=self.time_dim)),
                ],
                axis=-1
            )
            x_p = np.gradient(x_p, axis=0)
            x_p = tanh(x_p * T)

        x_sma = np.stack(
            [
                np.frombuffer(self.data.sma_16.get(size=self.time_dim)),
                np.frombuffer(self.data.sma_32.get(size=self.time_dim)),
                np.frombuffer(self.data.sma_64.get(size=self.time_dim)),
                np.frombuffer(self.data.sma_128.get(size=self.time_dim)),
                np.frombuffer(self.data.sma_256.get(size=self.time_dim)),
            ],
            axis=-1
        )
        # Gradient along features axis:
        x_sma = np.gradient(x_sma, axis=-1) * T2

        # Log-scale:
        #x_sma = log_transform(x_sma)
        x_sma = tanh(x_sma)

        #x = np.concatenate([x_p, x_sma], axis=-1)
        x = x_sma
        #x = x_p

        return x[:, None, :]

    def get_broker_state(self):
        T_b = 1
        x_broker = np.concatenate(
            [
                np.asarray(self.sliding_stat['broker_value'])[..., None],
                np.asarray(self.sliding_stat['unrealized_pnl'])[..., None],
                np.asarray(self.sliding_stat['realized_pnl'])[..., None],
                np.asarray(self.sliding_stat['broker_cash'])[..., None],
                np.asarray(self.sliding_stat['exposure'])[..., None],
            ],
            axis=-1
        )
        x_broker = tanh(np.gradient(x_broker, axis=-1) * T_b)
        return x_broker[:, None, :]
예제 #23
0
class DevStrat_4_6(BTgymBaseStrategy):
    """
    Objectives:
        external state data feature search:
            time_embedded three-channeled vector:
                - `Open` channel is one time-step difference of Open price;
                - `High` and `Low` channels are differences
                  between current Open price and current High or Low prices respectively

        internal state data feature search:
            time_embedded concatenated vector of broker and portfolio statistics
            time_embedded vector of last actions recieved (one-hot)
            time_embedded vector of rewards

        reward shaping search:
           potential-based shaping functions

    Data:
        synthetic/real
    """

    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = time_dim

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-1,
                       high=1,
                       shape=(time_dim, 1, 3),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        initial_action=None,
        initial_portfolio_action=None,
        skip_frame=skip_frame,
        state_ext_scale=2e3,  # EURUSD
        state_int_scale=1.0,  # not used
        metadata={})

    def __init__(self, **kwargs):
        """

        Args:
            **kwargs:   see BTgymBaseStrategy args.
        """
        super(DevStrat_4_6, self).__init__(**kwargs)
        self.state['metadata'] = self.metadata

        self.log.debug('DEV_state_shape: {}'.format(self.p.state_shape))
        self.log.debug('DEV_skip_frame: {}'.format(self.p.skip_frame))
        self.log.debug('DEV_portfolio_actions: {}'.format(
            self.p.portfolio_actions))
        self.log.debug('DEV_drawdown_call: {}'.format(self.p.drawdown_call))
        self.log.debug('DEV_target_call: {}'.format(self.p.target_call))
        self.log.debug('DEV_dataset_stat:\n{}'.format(self.p.dataset_stat))
        self.log.debug('DEV_episode_stat:\n{}'.format(self.p.episode_stat))

    def set_datalines(self):

        # Define data channels:
        self.channel_O = bt.Sum(self.data.open, -self.data.open(-1))
        self.channel_H = bt.Sum(self.data.high, -self.data.open)
        self.channel_L = bt.Sum(self.data.low, -self.data.open)

    def get_external_state(self):

        x = np.stack([
            np.frombuffer(self.channel_O.get(size=self.time_dim)),
            np.frombuffer(self.channel_H.get(size=self.time_dim)),
            np.frombuffer(self.channel_L.get(size=self.time_dim)),
        ],
                     axis=-1)
        # Amplify and squash in [-1,1], seems to be best option as of 4.10.17:
        # `self.p.state_ext_scale` param is supposed to keep most of the signal
        # in 'linear' part of tanh while squashing spikes.
        x_market = tanh(x * self.p.state_ext_scale)

        return x_market[:, None, :]
예제 #24
0
class BTgymEnv(gym.Env):
    """OpenAI Gym environment wrapper for Backtrader backtesting/trading library.
    """
    # Datafeed Server management:
    data_master = True
    data_network_address = 'tcp://127.0.0.1:'  # using localhost.
    data_port = 4999
    data_server = None
    data_server_pid = None
    data_context = None
    data_socket = None
    data_server_response = None

    # Dataset:
    dataset = None  # BTgymDataset instance.
    dataset_stat = None

    # Backtrader engine:
    engine = None  # bt.Cerbro subclass for server to execute.

    # Strategy:
    strategy = None  # strategy to use if no <engine> class been passed.

    # Server and network:
    server = None  # Server process.
    context = None  # ZMQ context.
    socket = None  # ZMQ socket, client side.
    port = 5500  # network port to use.
    network_address = 'tcp://127.0.0.1:'  # using localhost.
    ctrl_actions = ('_done', '_reset', '_stop', '_getstat', '_render'
                    )  # server control messages.
    server_response = None

    # Connection timeout:
    connect_timeout = 60  # server connection timeout in seconds.
    #connect_timeout_step = 0.01  # time between retries in seconds.

    # Rendering:
    render_enabled = True
    render_modes = [
        'human',
        'episode',
    ]
    # `episode` - plotted episode results.
    # `human` - raw_state observation in conventional human-readable format.
    #  <obs_space_key> - rendering of arbitrary state presented in observation_space with same key.

    renderer = None  # Rendering support.
    rendered_rgb = dict()  # Keep last rendered images for each mode.

    # Logging:
    log = None
    verbose = 0  # verbosity mode: 0 - silent, 1 - info, 2 - debugging level (lot of traffic!).

    closed = True

    def __init__(self, *args, **kwargs):
        """
        Environment kwargs applying logic::

            if <engine> kwarg is given:
                do not use default engine and strategy parameters;
                ignore <strategy> kwarg and all strategy and engine-related kwargs.

            else (no <engine>):
                use default engine parameters;
                if any engine-related kwarg is given:
                    override corresponding default parameter;

                if <strategy> is given:
                    do not use default strategy parameters;
                    if any strategy related kwarg is given:
                        override corresponding strategy parameter;

                else (no <strategy>):
                    use default strategy parameters;
                    if any strategy related kwarg is given:
                        override corresponding strategy parameter;

            if <dataset> kwarg is given:
                do not use default dataset parameters;
                ignore dataset related kwargs;

            else (no <dataset>):
                use default dataset parameters;
                    if  any dataset related kwarg is given:
                        override corresponding dataset parameter;

            If any <other> kwarg is given:
                override corresponding default parameter.
        """

        #print("start backtrader")

        # Parameters and default values:
        self.params = dict(

            # Backtrader engine mandatory parameters:
            engine=dict(
                start_cash=10.0,  # initial trading capital.
                broker_commission=
                0.001,  # trade execution commission, default is 0.1% of operation value.
                fixed_stake=10,  # single trade stake is fixed type by def.
            ),
            # Dataset mandatory parameters:
            dataset=dict(filename=None, ),
            strategy=dict(state_shape=dict(), ),
            render=dict(),
        )
        p2 = dict(
            # Strategy related parameters:
            # Observation state shape is dictionary of Gym spaces,
            # at least should contain `raw_state` field.
            # By convention first dimension of every Gym Box space is time embedding one;
            # one can define any shape; should match env.observation_space.shape.
            # observation space state min/max values,
            # For `raw_state' - absolute min/max values from BTgymDataset will be used.
            state_shape=dict(raw_state=spaces.Box(
                shape=(10, 4),
                low=-100,
                high=100,
            )),
            drawdown_call=
            None,  # episode maximum drawdown threshold, default is 90% of initial value.
            portfolio_actions=None,
            # agent actions,
            # should consist with BTgymStrategy order execution logic;
            # defaults are: 0 - 'do nothing', 1 - 'buy', 2 - 'sell', 3 - 'close position'.
            skip_frame=None,
            # Number of environment steps to skip before returning next response,
            # e.g. if set to 10 -- agent will interact with environment every 10th episode step;
            # Every other step agent's action is assumed to be 'hold'.
            # Note: INFO part of environment response is a list of all skipped frame's info's,
            #       i.e. [info[-9], info[-8], ..., info[0].
        )

        # Update self attributes, remove used kwargs:
        for key in dir(self):
            if key in kwargs.keys():
                setattr(self, key, kwargs.pop(key))

        self.metadata = {'render.modes': self.render_modes}

        # Verbosity control:
        if True:  #self.log is None:
            self.log = logging.getLogger('Env')
            log_levels = [
                (0, 'WARNING'),
                (1, 'INFO'),
                (2, 'DEBUG'),
            ]
            for key, level in log_levels:
                if key == self.verbose:
                    self.log.setLevel(level)

        # Network parameters:
        self.network_address += str(self.port)
        self.data_network_address += str(self.data_port)

        # Set server rendering:
        if self.render_enabled:
            self.renderer = BTgymRendering(self.metadata['render.modes'],
                                           **kwargs)

        else:
            self.renderer = BTgymNullRendering()
            self.log.info(
                'Rendering disabled. Call to render() will return null-plug image.'
            )

        # Append logging:
        self.renderer.log = self.log

        # Update params -1: pull from renderer, remove used kwargs:
        self.params['render'].update(self.renderer.params)
        for key in self.params['render'].keys():
            if key in kwargs.keys():
                _ = kwargs.pop(key)

        if self.data_master:
            # DATASET preparation, only data_master executes this:
            #
            if self.dataset is not None:
                # If BTgymDataset instance has been passed:
                # do nothing.
                msg = 'Custom Dataset class used.'

            else:
                # If no BTgymDataset has been passed,
                # Make default dataset with given CSV file:
                try:
                    os.path.isfile(str(self.params['dataset']['filename']))

                except:
                    raise FileNotFoundError(
                        'Dataset source data file not specified/not found')

                # Use kwargs to instantiate dataset:
                self.dataset = BTgymDataset(**kwargs)
                msg = 'Base Dataset class used.'

            # Append logging:
            self.dataset.log = self.log

            # Update params -2: pull from dataset, remove used kwargs:
            self.params['dataset'].update(self.dataset.params)
            for key in self.params['dataset'].keys():
                if key in kwargs.keys():
                    _ = kwargs.pop(key)

            self.log.info(msg)

        # Connect/Start data server (and get dataset statistic):
        self.log.info('Connecting data_server...')
        self._start_data_server()
        self.log.info('...done.')
        # ENGINE preparation:

        # Update params -3: pull engine-related kwargs, remove used:
        for key in self.params['engine'].keys():
            if key in kwargs.keys():
                self.params['engine'][key] = kwargs.pop(key)

        if self.engine is not None:
            # If full-blown bt.Cerebro() subclass has been passed:
            # Update info:
            msg = 'Custom Cerebro class used.'
            self.strategy = msg
            for key in self.params['engine'].keys():
                self.params['engine'][key] = msg

        # Note: either way, bt.observers.DrawDown observer [and logger] will be added to any BTgymStrategy instance
        # by BTgymServer process at runtime.

        else:
            # Default configuration for Backtrader computational engine (Cerebro),
            # if no bt.Cerebro() custom subclass has been passed,
            # get base class Cerebro(), using kwargs on top of defaults:
            self.engine = bt.Cerebro()
            msg = 'Base Cerebro class used.'

            # First, set STRATEGY configuration:
            if self.strategy is not None:
                # If custom strategy has been passed:
                msg2 = 'Custom Strategy class used.'

            else:
                # Base class strategy :
                self.strategy = BTgymBaseStrategy
                msg2 = 'Base Strategy class used.'

            # Add, using kwargs on top of defaults:
            strat_idx = self.engine.addstrategy(self.strategy, **kwargs)

            msg += ' ' + msg2

            # Second, set Cerebro-level configuration:
            self.engine.broker.setcash(self.params['engine']['start_cash'])
            self.engine.broker.setcommission(
                self.params['engine']['broker_commission'])
            self.engine.addsizer(bt.sizers.SizerFix,
                                 stake=self.params['engine']['fixed_stake'])

        self.log.info(msg)

        # Define observation space shape, minimum / maximum values and agent action space.
        # Retrieve values from configured engine or...

        # ...Update params -4:
        # Pull strategy defaults to environment params dict :
        for t_key, t_value in self.engine.strats[0][0][0].params._gettuple():
            self.params['strategy'][t_key] = t_value

        # Update it with values from strategy 'passed-to params':
        for key, value in self.engine.strats[0][0][2].items():
            self.params['strategy'][key] = value

        # ... Push it all back (don't ask):
        for key, value in self.params['strategy'].items():
            self.engine.strats[0][0][2][key] = value

        # For 'raw_state' min/max values,
        # the only way is to infer from raw Dataset price values (we already got those from data_server):
        if 'raw_state' in self.params['strategy']['state_shape'].keys():
            # Exclude 'volume' from columns we count:
            self.dataset_columns.remove('volume')

            #print(self.params['strategy'])
            #print('self.engine.strats[0][0][2]:', self.engine.strats[0][0][2])
            #print('self.engine.strats[0][0][0].params:', self.engine.strats[0][0][0].params._gettuple())

            # Override with absolute price min and max values:
            self.params['strategy']['state_shape']['raw_state'].low =\
                self.engine.strats[0][0][2]['state_shape']['raw_state'].low =\
                np.zeros(self.params['strategy']['state_shape']['raw_state'].shape) +\
                self.dataset_stat.loc['min', self.dataset_columns].min()

            self.params['strategy']['state_shape']['raw_state'].high = \
                self.engine.strats[0][0][2]['state_shape']['raw_state'].high = \
                np.zeros(self.params['strategy']['state_shape']['raw_state'].shape) + \
                self.dataset_stat.loc['max', self.dataset_columns].max()

            self.log.info(
                'Inferring `state_raw` high/low values form dataset: {:.6f} / {:.6f}.'
                .format(
                    self.dataset_stat.loc['min', self.dataset_columns].min(),
                    self.dataset_stat.loc['max', self.dataset_columns].max()))

        # Set observation space shape from engine/strategy parameters:
        self.observation_space = DictSpace(
            self.params['strategy']['state_shape'])

        self.log.debug('Obs. shape: {}'.format(self.observation_space.spaces))
        #self.log.debug('Obs. min:\n{}\nmax:\n{}'.format(self.observation_space.low, self.observation_space.high))

        # Set action space and corresponding server messages:
        self.action_space = spaces.Discrete(
            len(self.params['strategy']['portfolio_actions']))
        self.server_actions = self.params['strategy']['portfolio_actions']

        # Finally:
        self.server_response = None
        self.env_response = None

        # If instance is datamaster - it may or may not want to launch self BTgymServer (can do it later via reset);
        # else it always need to launch it:
        #if not self.data_master:
        self._start_server()
        self.closed = False

        self.log.info('Environment is ready.')

    def _seed(self, seed=None):
        """
        Sets env. random seed.

        Args:
            seed:   int or None
        """
        np.random.seed(seed)

    def _comm_with_timeout(
        self,
        socket,
        message,
    ):
        """
        Exchanges messages via socket, timeout sensitive.

        Args:
            socket: zmq connected socket to communicate via;
            message: message to send;

        Note:
            socket zmq.RCVTIMEO and zmq.SNDTIMEO should be set to some finite number of milliseconds.

        Returns:
            dictionary:
                status: communication result;
                message: received message if status == `ok` or None;
                time: remote side response time.
        """
        response = dict(
            status='ok',
            message=None,
        )
        try:
            socket.send_pyobj(message)

        except zmq.ZMQError as e:
            if e.errno == zmq.EAGAIN:
                response['status'] = 'send_failed_due_to_connect_timeout'

            else:
                response['status'] = 'send_failed_for_unknown_reason'
            return response

        start = time.time()
        try:
            response['message'] = socket.recv_pyobj()
            response['time'] = time.time() - start

        except zmq.ZMQError as e:
            if e.errno == zmq.EAGAIN:
                response['status'] = 'receive_failed_due_to_connect_timeout'

            else:
                response['status'] = 'receive_failed_for_unknown_reason'
            return response

        return response

    def _start_server(self):
        """
        Configures backtrader REQ/REP server instance and starts server process.
        """

        # Ensure network resources:
        # 1. Release client-side, if any:
        if self.context:
            self.context.destroy()
            self.socket = None

        # 2. Kill any process using server port:
        cmd = "kill $( lsof -i:{} -t ) > /dev/null 2>&1".format(self.port)
        os.system(cmd)

        # Set up client channel:
        self.context = zmq.Context()
        self.socket = self.context.socket(zmq.REQ)
        self.socket.setsockopt(zmq.RCVTIMEO, self.connect_timeout * 1000)
        self.socket.setsockopt(zmq.SNDTIMEO, self.connect_timeout * 1000)
        self.socket.connect(self.network_address)

        # Configure and start server:
        self.server = BTgymServer(
            cerebro=self.engine,
            render=self.renderer,
            network_address=self.network_address,
            data_network_address=self.data_network_address,
            connect_timeout=self.connect_timeout,
            log=self.log)
        self.server.daemon = False
        self.server.start()
        # Wait for server to startup:
        time.sleep(1)

        # Check connection:
        self.log.info('Server started, pinging {} ...'.format(
            self.network_address))

        self.server_response = self._comm_with_timeout(
            socket=self.socket, message={'ctrl': 'ping!'})
        if self.server_response['status'] in 'ok':
            self.log.debug('Server seems ready with response: <{}>'.format(
                self.server_response['message']))

        else:
            msg = 'Server unreachable with status: <{}>.'.format(
                self.server_response['status'])
            self.log.error(msg)
            raise ConnectionError(msg)

        self._closed = False

    def _stop_server(self):
        """
        Stops BT server process, releases network resources.
        """
        if self.server:

            if self._force_control_mode():
                # In case server is running and client side is ok:
                self.socket.send_pyobj({'ctrl': '_stop'})
                self.server_response = self.socket.recv_pyobj()

            else:
                self.server.terminate()
                self.server.join()
                self.server_response = 'Server process terminated.'

            self.log.info('{} Exit code: {}'.format(self.server_response,
                                                    self.server.exitcode))

        # Release client-side, if any:
        if self.context:
            self.context.destroy()

    def _force_control_mode(self):
        """Puts BT server to control mode.
        """
        # Check is there any faults with server process and connection?
        network_error = [
            (not self.server or not self.server.is_alive(),
             'No running server found. Hint: forgot to call reset()?'),
            (not self.context
             or self.context.closed, 'No network connection found.'),
        ]
        for (err, msg) in network_error:
            if err:
                self.log.info(msg)
                self.server_response = msg
                return False

            # If everything works, insist to go 'control':
            self.server_response = {}
            attempt = 0

            while 'ctrl' not in self.server_response:
                self.socket.send_pyobj({'ctrl': '_done'})
                self.server_response = self.socket.recv_pyobj()
                attempt += 1
                self.log.debug(
                    'FORCE CONTROL MODE attempt: {}.\nResponse: {}'.format(
                        attempt, self.server_response))

            return True

    def _assert_response(self, response):
        """
        Simple watcher:
        roughly checks if we really talking to environment (== episode is running).
        Rises exception if response given is not as expected.
        """
        if type(response) == tuple and len(response) == 4:
            pass

        else:
            msg = 'Unexpected environment response: {}\nHint: Forgot to call reset() or reset_data()?'.format(
                response)
            raise AssertionError(msg)

        self.log.debug(
            'Env response checker received:\n{}\nas type: {}'.format(
                response, type(response)))

    def _print_space(self, space, _tab=''):
        """
        TODO: MAKe IT WORK
        Parses observation space shape or response.

        Args:
            space: gym observation space or response.

        Returns:
            description as string.
        """
        response = ''
        if type(space) in [dict, OrderedDict]:
            for key, value in space.items():
                response += '\n{}{}:{}\n'.format(
                    _tab, key, self._print_space(value, '   '))

        elif type(space) in [spaces.Dict, DictSpace]:
            for s in space.spaces:
                response += self._print_space(s, '   ')

        elif type(space) in [tuple, list]:
            for i in space:
                response += self._print_space(i, '   ')

        elif type(space) == np.ndarray:
            response += '\n{}array of shape: {}, low: {}, high: {}'.format(
                _tab, space.shape, space.min(), space.max())

        else:
            response += '\n{}{}, '.format(_tab, space)
            try:
                response += 'low: {}, high: {}'.format(space.low.min(),
                                                       space.high.max())

            except (KeyError, AttributeError, ArithmeticError,
                    ValueError) as e:
                pass
                #response += '\n{}'.format(e)

        return response

    def _reset(
        self,
        state_only=True
    ):  # By default, returns only initial state observation (Gym convention).
        """
        Implementation of OpenAI Gym env.reset method. Starts new episode.
        """

        # Data Server check:
        if self.data_master:
            if not self.data_server or not self.data_server.is_alive():
                self.log.info('No running data_server found, starting...')
                self._start_data_server()

            # Dataset status check:
            self.data_server_response = self._comm_with_timeout(
                socket=self.data_socket, message={'ctrl': '_get_info'})
            if not self.data_server_response['message']['dataset_is_ready']:
                self.log.warning(
                    'Data_master `reset()` called prior to `reset_data()` with [possibly inconsistent] defaults.'
                )
                self.reset_data()

        # Server process check:
        if not self.server or not self.server.is_alive():
            self.log.info('No running server found, starting...')
            self._start_server()

        if self._force_control_mode():
            self.socket.send_pyobj({'ctrl': '_reset'})
            self.server_response = self.socket.recv_pyobj()

            # Get initial environment response:
            self.env_response = self._step(0)

            # Check (once) if it is really (o,r,d,i) tuple:
            self._assert_response(self.env_response)

            # Check (once) if state_space is as expected:
            try:
                assert self.observation_space.contains(self.env_response[0])

            except (AssertionError, AttributeError) as e:
                msg1 = self._print_space(self.observation_space.spaces)
                msg2 = self._print_space(self.env_response[0])
                msg3 = ''
                for step_info in self.env_response[-1]:
                    msg3 += '{}\n'.format(step_info)
                msg = ('\nState observation shape/range mismatch!\n' +
                       'Space set by env: \n{}\n' +
                       'Space returned by server: \n{}\n' +
                       'Full response:\n{}\n' + 'Reward: {}\n' + 'Done: {}\n' +
                       'Info:\n{}\n' +
                       'Hint: Wrong Strategy.get_state() parameters?').format(
                           msg1,
                           msg2,
                           self.env_response[0],
                           self.env_response[1],
                           self.env_response[2],
                           msg3,
                       )
                self.log.error(msg)
                self._stop_server()
                raise AssertionError(msg)

            if state_only:
                return self.env_response[0]
            else:
                return self.env_response

        else:
            msg = 'Something went wrong. env.reset() can not get response from server.'
            self.log.error(msg)
            raise ChildProcessError(msg)

    def _step(self, action):
        """
        Implementation of OpenAI Gym env.step method.
        Relies on remote backtrader server for actual environment dynamics computing.
        """
        # Are you in the list, ready to go and all that?
        if self.action_space.contains(action)\
            and not self._closed\
            and (self.socket is not None)\
            and not self.socket.closed:
            pass

        else:
            msg = ('\nAt least one of these is true:\n' +
                   'Action error: (space is {}, action sent is {}): {}\n' +
                   'Environment closed: {}\n' +
                   'Network error [socket doesnt exists or closed]: {}\n' +
                   'Hint: forgot to call reset()?').format(
                       self.action_space,
                       action,
                       not self.action_space.contains(action),
                       self._closed,
                       not self.socket or self.socket.closed,
                   )
            self.log.info(msg)
            raise AssertionError(msg)

        # Send action to backtrader engine, receive environment response
        env_response = self._comm_with_timeout(
            socket=self.socket,
            message={'action': self.server_actions[action]})
        if not env_response['status'] in 'ok':
            msg = 'Env.step: server unreachable with status: <{}>.'.format(
                env_response['status'])
            self.log.error(msg)
            raise ConnectionError(msg)

        self.env_response = env_response['message']

        #print("STEP PERFORMED!!!!")

        return self.env_response

    def _close(self):
        """
        Implementation of OpenAI Gym env.close method.
        Puts BTgym server in Control Mode.
        """
        self._stop_server()
        self._stop_data_server()
        self.log.info('Environment closed.')

    def get_stat(self):
        """
        Returns last run episode statistics.

        Note:
            when invoked, forces running episode to terminate.
        """
        if self._force_control_mode():
            self.socket.send_pyobj({'ctrl': '_getstat'})
            return self.socket.recv_pyobj()

        else:
            return self.server_response

    def _render(self, mode='other_mode', close=False):
        """
        Implementation of OpenAI Gym env.render method.
        Visualises current environment state.

        Args:
            `mode`:     str, any of these::

                            `human` - current state observation as price lines;
                            `episode` - plotted results of last completed episode.
                            [other_key] - corresponding to any custom observation space key
        """
        if close:
            return None

        if not self._closed\
            and self.socket\
            and not self.socket.closed:
            pass

        else:
            msg = ('\nCan'
                   't get renderings.'
                   '\nAt least one of these is true:\n' +
                   'Environment closed: {}\n' +
                   'Network error [socket doesnt exists or closed]: {}\n' +
                   'Hint: forgot to call reset()?').format(
                       self._closed,
                       not self.socket or self.socket.closed,
                   )
            self.log.warning(msg)
            return None
        if mode not in self.render_modes:
            raise ValueError('Unexpected render mode {}'.format(mode))
        self.socket.send_pyobj({'ctrl': '_render', 'mode': mode})
        rgb_array_dict = self.socket.recv_pyobj()

        self.rendered_rgb.update(rgb_array_dict)

        return self.rendered_rgb[mode]

    def stop(self):
        """
        Finishes current episode if any, does nothing otherwise. Leaves server running.
        """
        if self._force_control_mode():
            self.log.info('Episode stop forced.')

    def _restart_server(self):
        """Restarts server.
        """
        self._stop_server()
        self._start_server()
        self.log.info('Server restarted.')

    def _start_data_server(self):
        """
        For data_master environment:
            - configures backtrader REQ/REP server instance and starts server process.

        For others:
            - establishes network connection to existing data_server.
        """
        self.data_server = None

        # Ensure network resources:
        # 1. Release client-side, if any:
        if self.data_context:
            self.data_context.destroy()
            self.data_socket = None

        # Only data_master launches/stops data_server process:
        if self.data_master:
            # 2. Kill any process using server port:
            cmd = "kill $( lsof -i:{} -t ) > /dev/null 2>&1".format(
                self.data_port)
            os.system(cmd)

            # Configure and start server:
            self.data_server = BTgymDataFeedServer(
                dataset=self.dataset,
                network_address=self.data_network_address,
                log=self.log,
            )
            self.data_server.daemon = False
            self.data_server.start()
            # Wait for server to startup
            time.sleep(1)

        # Set up client channel:
        self.data_context = zmq.Context()
        self.data_socket = self.data_context.socket(zmq.REQ)
        self.data_socket.setsockopt(zmq.RCVTIMEO, self.connect_timeout * 1000)
        self.data_socket.setsockopt(zmq.SNDTIMEO, self.connect_timeout * 1000)
        self.data_socket.connect(self.data_network_address)

        # Check connection:
        self.log.info('Pinging data_server at: {} ...'.format(
            self.data_network_address))

        self.data_server_response = self._comm_with_timeout(
            socket=self.data_socket, message={'ctrl': 'ping!'})
        if self.data_server_response['status'] in 'ok':
            self.log.debug(
                'Data_server seems ready with response: <{}>'.format(
                    self.data_server_response['message']))

        else:
            msg = 'Data_server unreachable with status: <{}>.'.\
                format(self.data_server_response['status'])
            self.log.error(msg)
            raise SystemExit(msg)

        # Get info and statistic:
        self.dataset_stat, self.dataset_columns, self.data_server_pid = self._get_dataset_info(
        )

    def _stop_data_server(self):
        """
        For data_master:
            - stops BT server process, releases network resources.
        """
        if self.data_master:
            if self.data_server is not None and self.data_server.is_alive():
                # In case server is running and is ok:
                self.data_socket.send_pyobj({'ctrl': '_stop'})
                self.data_server_response = self.data_socket.recv_pyobj()

            else:
                self.data_server.terminate()
                self.data_server.join()
                self.data_server_response = 'Data_server process terminated.'

            self.log.info('{} Exit code: {}'.format(self.data_server_response,
                                                    self.data_server.exitcode))

        #if self.data_context:
        #    self.data_context.destroy()
        #    self.data_socket = None

    def _restart_data_server(self):
        """
        Restarts data_server.
        """
        self._stop_data_server()
        self._start_data_server()

    def _get_dataset_info(self):
        """
        Retrieves dataset descriptive statistic.
        """
        self.data_socket.send_pyobj({'ctrl': '_get_info'})
        self.data_server_response = self.data_socket.recv_pyobj()

        return self.data_server_response['dataset_stat'],\
               self.data_server_response['dataset_columns'],\
               self.data_server_response['pid']

    def reset_data(self, **kwargs):
        """
        Resets data provider class used, whatever it means for that class. Gets data_server ready to provide data.
        Supposed to be called before first env.reset().

        Note:
            when invoked, forces running episode to terminate.

        Args:
            **kwargs:   data provider class .reset() method specific.
        """
        if self.closed:
            self._start_server()
            if self.data_master:
                self._start_data_server()
            self.closed = False

        else:
            _ = self._force_control_mode()

        if self.data_master:
            if self.data_server is None or not self.data_server.is_alive():
                self._restart_data_server()

            self.data_server_response = self._comm_with_timeout(
                socket=self.data_socket,
                message={
                    'ctrl': '_reset_data',
                    'kwargs': kwargs
                })
            if self.data_server_response['status'] in 'ok':
                self.log.debug(
                    'Dataset seems ready with response: <{}>'.format(
                        self.data_server_response['message']))

            else:
                msg = 'Data_server unreachable with status: <{}>.'. \
                    format(self.data_server_response['status'])
                self.log.error(msg)
                raise SystemExit(msg)

        else:
            pass
예제 #25
0
class DevStrat_4_9(DevStrat_4_7):
    """
    4_7 + Uses simple SMA market state features.
    """
    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20

    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    gamma = 1.0  # fi_gamma, should be MDP gamma decay

    reward_scale = 1  # reward multiplicator, touchy!

    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            spaces.Box(low=-100,
                       high=100,
                       shape=(time_dim, 1, 8),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-2, high=2, shape=(1, 1, 5), dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        initial_action=None,
        initial_portfolio_action=None,
        skip_frame=skip_frame,
        gamma=gamma,
        reward_scale=1.0,
        state_ext_scale=1e4,  # EURUSD
        state_int_scale=1.0,  # not used
        metadata={},
    )

    def set_datalines(self):
        self.data.sma_4 = btind.SimpleMovingAverage(self.datas[0], period=4)
        self.data.sma_8 = btind.SimpleMovingAverage(self.datas[0], period=8)
        self.data.sma_16 = btind.SimpleMovingAverage(self.datas[0], period=16)
        self.data.sma_32 = btind.SimpleMovingAverage(self.datas[0], period=32)
        self.data.sma_64 = btind.SimpleMovingAverage(self.datas[0], period=64)
        self.data.sma_128 = btind.SimpleMovingAverage(self.datas[0],
                                                      period=128)
        self.data.sma_256 = btind.SimpleMovingAverage(self.datas[0],
                                                      period=256)

        self.data.dim_sma = btind.SimpleMovingAverage(self.datas[0],
                                                      period=(256 +
                                                              self.time_dim))
        self.data.dim_sma.plotinfo.plot = False

    def get_external_state(self):

        x = np.stack([
            np.frombuffer(self.data.open.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_4.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_8.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_16.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_32.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_64.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_128.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_256.get(size=self.time_dim)),
        ],
                     axis=-1)
        # Gradient along features axis:
        x = np.gradient(x, axis=1) * self.p.state_ext_scale

        # Log-scale:
        x = log_transform(x)
        return x[:, None, :]
예제 #26
0
    def __init__(self, *args, **kwargs):
        """
        Environment kwargs applying logic::

            if <engine> kwarg is given:
                do not use default engine and strategy parameters;
                ignore <strategy> kwarg and all strategy and engine-related kwargs.

            else (no <engine>):
                use default engine parameters;
                if any engine-related kwarg is given:
                    override corresponding default parameter;

                if <strategy> is given:
                    do not use default strategy parameters;
                    if any strategy related kwarg is given:
                        override corresponding strategy parameter;

                else (no <strategy>):
                    use default strategy parameters;
                    if any strategy related kwarg is given:
                        override corresponding strategy parameter;

            if <dataset> kwarg is given:
                do not use default dataset parameters;
                ignore dataset related kwargs;

            else (no <dataset>):
                use default dataset parameters;
                    if  any dataset related kwarg is given:
                        override corresponding dataset parameter;

            If any <other> kwarg is given:
                override corresponding default parameter.
        """

        #print("start backtrader")

        # Parameters and default values:
        self.params = dict(

            # Backtrader engine mandatory parameters:
            engine=dict(
                start_cash=10.0,  # initial trading capital.
                broker_commission=
                0.001,  # trade execution commission, default is 0.1% of operation value.
                fixed_stake=10,  # single trade stake is fixed type by def.
            ),
            # Dataset mandatory parameters:
            dataset=dict(filename=None, ),
            strategy=dict(state_shape=dict(), ),
            render=dict(),
        )
        p2 = dict(
            # Strategy related parameters:
            # Observation state shape is dictionary of Gym spaces,
            # at least should contain `raw_state` field.
            # By convention first dimension of every Gym Box space is time embedding one;
            # one can define any shape; should match env.observation_space.shape.
            # observation space state min/max values,
            # For `raw_state' - absolute min/max values from BTgymDataset will be used.
            state_shape=dict(raw_state=spaces.Box(
                shape=(10, 4),
                low=-100,
                high=100,
            )),
            drawdown_call=
            None,  # episode maximum drawdown threshold, default is 90% of initial value.
            portfolio_actions=None,
            # agent actions,
            # should consist with BTgymStrategy order execution logic;
            # defaults are: 0 - 'do nothing', 1 - 'buy', 2 - 'sell', 3 - 'close position'.
            skip_frame=None,
            # Number of environment steps to skip before returning next response,
            # e.g. if set to 10 -- agent will interact with environment every 10th episode step;
            # Every other step agent's action is assumed to be 'hold'.
            # Note: INFO part of environment response is a list of all skipped frame's info's,
            #       i.e. [info[-9], info[-8], ..., info[0].
        )

        # Update self attributes, remove used kwargs:
        for key in dir(self):
            if key in kwargs.keys():
                setattr(self, key, kwargs.pop(key))

        self.metadata = {'render.modes': self.render_modes}

        # Verbosity control:
        if True:  #self.log is None:
            self.log = logging.getLogger('Env')
            log_levels = [
                (0, 'WARNING'),
                (1, 'INFO'),
                (2, 'DEBUG'),
            ]
            for key, level in log_levels:
                if key == self.verbose:
                    self.log.setLevel(level)

        # Network parameters:
        self.network_address += str(self.port)
        self.data_network_address += str(self.data_port)

        # Set server rendering:
        if self.render_enabled:
            self.renderer = BTgymRendering(self.metadata['render.modes'],
                                           **kwargs)

        else:
            self.renderer = BTgymNullRendering()
            self.log.info(
                'Rendering disabled. Call to render() will return null-plug image.'
            )

        # Append logging:
        self.renderer.log = self.log

        # Update params -1: pull from renderer, remove used kwargs:
        self.params['render'].update(self.renderer.params)
        for key in self.params['render'].keys():
            if key in kwargs.keys():
                _ = kwargs.pop(key)

        if self.data_master:
            # DATASET preparation, only data_master executes this:
            #
            if self.dataset is not None:
                # If BTgymDataset instance has been passed:
                # do nothing.
                msg = 'Custom Dataset class used.'

            else:
                # If no BTgymDataset has been passed,
                # Make default dataset with given CSV file:
                try:
                    os.path.isfile(str(self.params['dataset']['filename']))

                except:
                    raise FileNotFoundError(
                        'Dataset source data file not specified/not found')

                # Use kwargs to instantiate dataset:
                self.dataset = BTgymDataset(**kwargs)
                msg = 'Base Dataset class used.'

            # Append logging:
            self.dataset.log = self.log

            # Update params -2: pull from dataset, remove used kwargs:
            self.params['dataset'].update(self.dataset.params)
            for key in self.params['dataset'].keys():
                if key in kwargs.keys():
                    _ = kwargs.pop(key)

            self.log.info(msg)

        # Connect/Start data server (and get dataset statistic):
        self.log.info('Connecting data_server...')
        self._start_data_server()
        self.log.info('...done.')
        # ENGINE preparation:

        # Update params -3: pull engine-related kwargs, remove used:
        for key in self.params['engine'].keys():
            if key in kwargs.keys():
                self.params['engine'][key] = kwargs.pop(key)

        if self.engine is not None:
            # If full-blown bt.Cerebro() subclass has been passed:
            # Update info:
            msg = 'Custom Cerebro class used.'
            self.strategy = msg
            for key in self.params['engine'].keys():
                self.params['engine'][key] = msg

        # Note: either way, bt.observers.DrawDown observer [and logger] will be added to any BTgymStrategy instance
        # by BTgymServer process at runtime.

        else:
            # Default configuration for Backtrader computational engine (Cerebro),
            # if no bt.Cerebro() custom subclass has been passed,
            # get base class Cerebro(), using kwargs on top of defaults:
            self.engine = bt.Cerebro()
            msg = 'Base Cerebro class used.'

            # First, set STRATEGY configuration:
            if self.strategy is not None:
                # If custom strategy has been passed:
                msg2 = 'Custom Strategy class used.'

            else:
                # Base class strategy :
                self.strategy = BTgymBaseStrategy
                msg2 = 'Base Strategy class used.'

            # Add, using kwargs on top of defaults:
            strat_idx = self.engine.addstrategy(self.strategy, **kwargs)

            msg += ' ' + msg2

            # Second, set Cerebro-level configuration:
            self.engine.broker.setcash(self.params['engine']['start_cash'])
            self.engine.broker.setcommission(
                self.params['engine']['broker_commission'])
            self.engine.addsizer(bt.sizers.SizerFix,
                                 stake=self.params['engine']['fixed_stake'])

        self.log.info(msg)

        # Define observation space shape, minimum / maximum values and agent action space.
        # Retrieve values from configured engine or...

        # ...Update params -4:
        # Pull strategy defaults to environment params dict :
        for t_key, t_value in self.engine.strats[0][0][0].params._gettuple():
            self.params['strategy'][t_key] = t_value

        # Update it with values from strategy 'passed-to params':
        for key, value in self.engine.strats[0][0][2].items():
            self.params['strategy'][key] = value

        # ... Push it all back (don't ask):
        for key, value in self.params['strategy'].items():
            self.engine.strats[0][0][2][key] = value

        # For 'raw_state' min/max values,
        # the only way is to infer from raw Dataset price values (we already got those from data_server):
        if 'raw_state' in self.params['strategy']['state_shape'].keys():
            # Exclude 'volume' from columns we count:
            self.dataset_columns.remove('volume')

            #print(self.params['strategy'])
            #print('self.engine.strats[0][0][2]:', self.engine.strats[0][0][2])
            #print('self.engine.strats[0][0][0].params:', self.engine.strats[0][0][0].params._gettuple())

            # Override with absolute price min and max values:
            self.params['strategy']['state_shape']['raw_state'].low =\
                self.engine.strats[0][0][2]['state_shape']['raw_state'].low =\
                np.zeros(self.params['strategy']['state_shape']['raw_state'].shape) +\
                self.dataset_stat.loc['min', self.dataset_columns].min()

            self.params['strategy']['state_shape']['raw_state'].high = \
                self.engine.strats[0][0][2]['state_shape']['raw_state'].high = \
                np.zeros(self.params['strategy']['state_shape']['raw_state'].shape) + \
                self.dataset_stat.loc['max', self.dataset_columns].max()

            self.log.info(
                'Inferring `state_raw` high/low values form dataset: {:.6f} / {:.6f}.'
                .format(
                    self.dataset_stat.loc['min', self.dataset_columns].min(),
                    self.dataset_stat.loc['max', self.dataset_columns].max()))

        # Set observation space shape from engine/strategy parameters:
        self.observation_space = DictSpace(
            self.params['strategy']['state_shape'])

        self.log.debug('Obs. shape: {}'.format(self.observation_space.spaces))
        #self.log.debug('Obs. min:\n{}\nmax:\n{}'.format(self.observation_space.low, self.observation_space.high))

        # Set action space and corresponding server messages:
        self.action_space = spaces.Discrete(
            len(self.params['strategy']['portfolio_actions']))
        self.server_actions = self.params['strategy']['portfolio_actions']

        # Finally:
        self.server_response = None
        self.env_response = None

        # If instance is datamaster - it may or may not want to launch self BTgymServer (can do it later via reset);
        # else it always need to launch it:
        #if not self.data_master:
        self._start_server()
        self.closed = False

        self.log.info('Environment is ready.')
예제 #27
0
class DevStrat_4_11_1(DevStrat_4_11):
    # Time embedding period:
    time_dim = 30  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params
    # Number of environment steps to skip before returning next response,
    # e.g. if set to 10 -- agent will interact with environment every 10th step;
    # every other step agent action is assumed to be 'hold':
    skip_frame = 10
    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = 20
    # Possible agent actions:
    portfolio_actions = ('hold', 'buy', 'sell', 'close')
    gamma = 0.99  # fi_gamma, should be MDP gamma decay
    reward_scale = 1  # reward multiplicator
    state_ext_scale = np.linspace(3e3, 1e3, num=5)
    params = dict(
        # Note: fake `Width` dimension to use 2d conv etc.:
        state_shape={
            'external':
            DictSpace({
                'diff':
                spaces.Box(low=-100,
                           high=100,
                           shape=(time_dim, 1, 5),
                           dtype=np.float32),
                'avg':
                spaces.Box(low=-100,
                           high=100,
                           shape=(time_dim, 1, 5),
                           dtype=np.float32),
            }),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 6),
                       dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        leverage=1.0,
        drawdown_call=5,
        target_call=19,
        portfolio_actions=portfolio_actions,
        initial_action=None,
        initial_portfolio_action=None,
        skip_frame=skip_frame,
        gamma=gamma,
        reward_scale=1.0,
        state_ext_scale=state_ext_scale,  # EURUSD
        state_int_scale=1.0,
        metadata={},
    )

    def get_external_state(self):
        x_sma = np.stack([
            np.frombuffer(self.data.sma_16.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_32.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_64.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_128.get(size=self.time_dim)),
            np.frombuffer(self.data.sma_256.get(size=self.time_dim)),
        ],
                         axis=-1)
        # Gradient along features axis:
        diff = np.gradient(x_sma, axis=-1) * self.p.state_ext_scale
        diff = tanh(diff)
        avg = np.gradient(x_sma, axis=0) * self.p.state_ext_scale
        avg = tanh(avg)

        return {'avg': avg[:, None, :], 'diff': diff[:, None, :]}
예제 #28
0
    def __init__(self, **kwargs):
        """
        Keyword Args:

            filename=None (str, list):                      csv data file.
            **datafeed_args (any):                          any datafeed-related args, passed through to
                                                            default btgym.datafeed class.
            dataset=None (btgym.datafeed):                  BTgymDataDomain instance,
                                                            overrides `filename` or any other datafeed-related args.
            strategy=None (btgym.startegy):                 strategy to be used by `engine`, any subclass of
                                                            btgym.strategy.base.BTgymBaseStrateg
            engine=None (bt.Cerebro):                       environment simulation engine, any bt.Cerebro subclass,
                                                            overrides `strategy` arg.
            network_address=`tcp://127.0.0.1:` (str):       BTGym_server address.
            port=5500 (int):                                network port to use for server - API_shell communication.
            data_master=True (bool):                        let this environment control over data_server;
            data_network_address=`tcp://127.0.0.1:` (str):  data_server address.
            data_port=4999 (int):                           network port to use for server -- data_server communication.
            connect_timeout=60 (int):                       server connection timeout in seconds.
            render_enabled=True (bool):                     enable rendering for this environment;
            render_modes=['human', 'episode'] (list):       `episode` - plotted episode results;
                                                            `human` - raw_state observation.
            **render_args (any):                            any render-related args, passed through to renderer class.
            verbose=0 (int):                                verbosity mode, {0 - WARNING, 1 - INFO, 2 - DEBUG}
            log_level=None (int):                           logbook level {DEBUG=10, INFO=11, NOTICE=12, WARNING=13},
                                                            overrides `verbose` arg;
            log=None (logbook.Logger):                      external logbook logger,
                                                            overrides `log_level` and `verbose` args.
            task=0 (int):                                   environment id

        Environment kwargs applying logic::

            if <engine> kwarg is given:
                do not use default engine and strategy parameters;
                ignore <strategy> kwarg and all strategy and engine-related kwargs.

            else (no <engine>):
                use default engine parameters;
                if any engine-related kwarg is given:
                    override corresponding default parameter;

                if <strategy> is given:
                    do not use default strategy parameters;
                    if any strategy related kwarg is given:
                        override corresponding strategy parameter;

                else (no <strategy>):
                    use default strategy parameters;
                    if any strategy related kwarg is given:
                        override corresponding strategy parameter;

            if <dataset> kwarg is given:
                do not use default dataset parameters;
                ignore dataset related kwargs;

            else (no <dataset>):
                use default dataset parameters;
                    if  any dataset related kwarg is given:
                        override corresponding dataset parameter;

            If any <other> kwarg is given:
                override corresponding default parameter.
        """
        # Parameters and default values:
        self.params = dict(

            # Backtrader engine mandatory parameters:
            engine=dict(
                start_cash=10.0,  # initial trading capital.
                broker_commission=
                0.001,  # trade execution commission, default is 0.1% of operation value.
                fixed_stake=10,  # single trade stake is fixed type by def.
            ),
            # Dataset mandatory parameters:
            dataset=dict(filename=None, ),
            strategy=dict(state_shape=dict(), ),
            render=dict(),
        )
        p2 = dict(  # IS HERE FOR REFERENCE ONLY
            # Strategy related parameters:
            # Observation state shape is dictionary of Gym spaces,
            # at least should contain `raw_state` field.
            # By convention first dimension of every Gym Box space is time embedding one;
            # one can define any shape; should match env.observation_space.shape.
            # observation space state min/max values,
            # For `raw_state' - absolute min/max values from BTgymDataset will be used.
            state_shape=dict(raw_state=spaces.Box(
                shape=(10, 4), low=-100, high=100, dtype=np.float32)),
            drawdown_call=
            None,  # episode maximum drawdown threshold, default is 90% of initial value.
            portfolio_actions=None,
            # agent actions,
            # should consist with BTgymStrategy order execution logic;
            # defaults are: 0 - 'do nothing', 1 - 'buy', 2 - 'sell', 3 - 'close position'.
            skip_frame=None,
            # Number of environment steps to skip before returning next response,
            # e.g. if set to 10 -- agent will interact with environment every 10th episode step;
            # Every other step agent's action is assumed to be 'hold'.
            # Note: INFO part of environment response is a list of all skipped frame's info's,
            #       i.e. [info[-9], info[-8], ..., info[0].
        )
        # Update self attributes, remove used kwargs:
        for key in dir(self):
            if key in kwargs.keys():
                setattr(self, key, kwargs.pop(key))

        self.metadata = {'render.modes': self.render_modes}

        # Logging and verbosity control:
        if self.log is None:
            StreamHandler(sys.stdout).push_application()
            if self.log_level is None:
                log_levels = [(0, NOTICE), (1, INFO), (2, DEBUG)]
                self.log_level = WARNING
                for key, value in log_levels:
                    if key == self.verbose:
                        self.log_level = value
            self.log = Logger('BTgymAPIshell_{}'.format(self.task),
                              level=self.log_level)

        # Network parameters:
        self.network_address += str(self.port)
        self.data_network_address += str(self.data_port)

        # Set server rendering:
        if self.render_enabled:
            self.renderer = BTgymRendering(self.metadata['render.modes'],
                                           log_level=self.log_level,
                                           **kwargs)

        else:
            self.renderer = BTgymNullRendering()
            self.log.info(
                'Rendering disabled. Call to render() will return null-plug image.'
            )

        # Append logging:
        self.renderer.log = self.log

        # Update params -1: pull from renderer, remove used kwargs:
        self.params['render'].update(self.renderer.params)
        for key in self.params['render'].keys():
            if key in kwargs.keys():
                _ = kwargs.pop(key)

        if self.data_master:
            # DATASET preparation, only data_master executes this:
            #
            if self.dataset is not None:
                # If BTgymDataset instance has been passed:
                # do nothing.
                msg = 'Custom Dataset class used.'

            else:
                # If no BTgymDataset has been passed,
                # Make default dataset with given CSV file:
                try:
                    os.path.isfile(str(self.params['dataset']['filename']))

                except:
                    raise FileNotFoundError(
                        'Dataset source data file not specified/not found')

                # Use kwargs to instantiate dataset:
                self.dataset = BTgymDataset(**kwargs)
                msg = 'Base Dataset class used.'

            # Append logging:
            self.dataset.set_logger(self.log_level, self.task)

            # Update params -2: pull from dataset, remove used kwargs:
            self.params['dataset'].update(self.dataset.params)
            for key in self.params['dataset'].keys():
                if key in kwargs.keys():
                    _ = kwargs.pop(key)

            self.log.info(msg)

        # Connect/Start data server (and get dataset statistic):
        self.log.info('Connecting data_server...')
        self._start_data_server()
        self.log.info('...done.')
        # ENGINE preparation:

        # Update params -3: pull engine-related kwargs, remove used:
        for key in self.params['engine'].keys():
            if key in kwargs.keys():
                self.params['engine'][key] = kwargs.pop(key)

        if self.engine is not None:
            # If full-blown bt.Cerebro() subclass has been passed:
            # Update info:
            msg = 'Custom Cerebro class used.'
            self.strategy = msg
            for key in self.params['engine'].keys():
                self.params['engine'][key] = msg

        # Note: either way, bt.observers.DrawDown observer [and logger] will be added to any BTgymStrategy instance
        # by BTgymServer process at runtime.

        else:
            # Default configuration for Backtrader computational engine (Cerebro),
            # if no bt.Cerebro() custom subclass has been passed,
            # get base class Cerebro(), using kwargs on top of defaults:
            self.engine = bt.Cerebro()
            msg = 'Base Cerebro class used.'

            # First, set STRATEGY configuration:
            if self.strategy is not None:
                # If custom strategy has been passed:
                msg2 = 'Custom Strategy class used.'

            else:
                # Base class strategy :
                self.strategy = BTgymBaseStrategy
                msg2 = 'Base Strategy class used.'

            # Add, using kwargs on top of defaults:
            #self.log.debug('kwargs for strategy: {}'.format(kwargs))
            strat_idx = self.engine.addstrategy(self.strategy, **kwargs)

            msg += ' ' + msg2

            # Second, set Cerebro-level configuration:
            self.engine.broker.setcash(self.params['engine']['start_cash'])
            self.engine.broker.setcommission(
                self.params['engine']['broker_commission'])
            self.engine.addsizer(bt.sizers.SizerFix,
                                 stake=self.params['engine']['fixed_stake'])

        self.log.info(msg)

        # Define observation space shape, minimum / maximum values and agent action space.
        # Retrieve values from configured engine or...

        # ...Update params -4:
        # Pull strategy defaults to environment params dict :
        for t_key, t_value in self.engine.strats[0][0][0].params._gettuple():
            self.params['strategy'][t_key] = t_value

        # Update it with values from strategy 'passed-to params':
        for key, value in self.engine.strats[0][0][2].items():
            self.params['strategy'][key] = value

        # ... Push it all back (don't ask):
        for key, value in self.params['strategy'].items():
            self.engine.strats[0][0][2][key] = value

        # For 'raw_state' min/max values,
        # the only way is to infer from raw Dataset price values (we already got those from data_server):
        if 'raw_state' in self.params['strategy']['state_shape'].keys():
            # Exclude 'volume' from columns we count:
            self.dataset_columns.remove('volume')

            #print(self.params['strategy'])
            #print('self.engine.strats[0][0][2]:', self.engine.strats[0][0][2])
            #print('self.engine.strats[0][0][0].params:', self.engine.strats[0][0][0].params._gettuple())

            # Override with absolute price min and max values:
            self.params['strategy']['state_shape']['raw_state'].low =\
                self.engine.strats[0][0][2]['state_shape']['raw_state'].low =\
                np.zeros(self.params['strategy']['state_shape']['raw_state'].shape) +\
                self.dataset_stat.loc['min', self.dataset_columns].min()

            self.params['strategy']['state_shape']['raw_state'].high = \
                self.engine.strats[0][0][2]['state_shape']['raw_state'].high = \
                np.zeros(self.params['strategy']['state_shape']['raw_state'].shape) + \
                self.dataset_stat.loc['max', self.dataset_columns].max()

            self.log.info(
                'Inferring `state_raw` high/low values form dataset: {:.6f} / {:.6f}.'
                .format(
                    self.dataset_stat.loc['min', self.dataset_columns].min(),
                    self.dataset_stat.loc['max', self.dataset_columns].max()))

        # Set observation space shape from engine/strategy parameters:
        self.observation_space = DictSpace(
            self.params['strategy']['state_shape'])

        self.log.debug('Obs. shape: {}'.format(self.observation_space.spaces))
        #self.log.debug('Obs. min:\n{}\nmax:\n{}'.format(self.observation_space.low, self.observation_space.high))

        # Set action space and corresponding server messages:
        self.action_space = spaces.Discrete(
            len(self.params['strategy']['portfolio_actions']))
        self.server_actions = self.params['strategy']['portfolio_actions']

        # Finally:
        self.server_response = None
        self.env_response = None

        #if not self.data_master:
        self._start_server()
        self.closed = False

        self.log.info('Environment is ready.')
예제 #29
0
파일: base.py 프로젝트: mysl/btgym
class BaseStrategy6(bt.Strategy):
    """
    Added for gen.6:
        traded asset volatility-based rescaling for all broker statistics and, consequently, reward fn
        self.p.norm_alpha - tracking smoothing decay parameter added
        self.p.target_call  - upper limit arg. is removed
        TODO: auto sizer inference, co-integration coeff. inference

    Controls Environment inner dynamics and backtesting logic. Provides gym'my (State, Action, Reward, Done, Info) data.
    Any State, Reward and Info computation logic can be implemented by subclassing BTgymStrategy and overriding
    get_[mode]_state(), get_reward(), get_info(), is_done() and set_datalines() methods.
    One can always go deeper and override __init__ () and next() methods for desired
    server cerebro engine behaviour, including order execution logic etc.

    Note:
        - base class supports single asset iteration via default data_line named 'base_asset', see derived classes
          multi-asset support
        - bt.observers.DrawDown observer will be automatically added to BTgymStrategy instance at runtime.
        - Since it is bt.Strategy subclass, refer to https://www.backtrader.com/docu/strategy.html for more information.
    """
    # Time embedding period:
    time_dim = 32  # NOTE: changed this --> change Policy  UNREAL for aux. pix control task upsampling params

    # Number of timesteps reward estimation statistics are averaged over, should be:
    # skip_frame_period <= avg_period <= time_embedding_period:
    avg_period = int(time_dim / 2)

    # Possible agent actions;  Note: place 'hold' first! :
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    features_parameters = ()
    num_features = len(features_parameters)

    params = dict(
        # Observation state shape is dictionary of Gym spaces,
        # at least should contain `raw_state` field.
        # By convention first dimension of every Gym Box space is time embedding one;
        # one can define any shape; should match env.observation_space.shape.
        # observation space state min/max values,
        # For `raw_state' (default) - absolute min/max values from BTgymDataset will be used.
        state_shape={
            'raw':
            spaces.Box(
                shape=(time_dim, 4),
                low=0,  # will get overridden.
                high=0,
                dtype=np.float32,
            ),
            'internal':
            spaces.Box(low=-100,
                       high=100,
                       shape=(avg_period, 1, 5),
                       dtype=np.float32),
            'stat':
            spaces.Box(low=-100, high=100, shape=(2, 1), dtype=np.float32),
            'metadata':
            DictSpace({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        },
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        slippage=None,
        leverage=1.0,
        gamma=0.99,  # fi_gamma, should match MDP gamma decay
        reward_scale=1.0,  # reward multiplicator
        norm_alpha=0.001,  # renormalisation tracking decay in []0, 1]
        drawdown_call=
        10,  # finish episode when hitting drawdown treshghold, in percent to initial cash.
        dataset_stat=
        None,  # Summary descriptive statistics for entire dataset and
        episode_stat=None,  # current episode. Got updated by server.
        time_dim=time_dim,  # time embedding period
        avg_period=
        avg_period,  # number of time steps reward estimation statistics are tracked over
        features_parameters=features_parameters,
        num_features=num_features,
        metadata={},
        broadcast_message={},
        trial_stat=None,
        trial_metadata=None,
        portfolio_actions=portfolio_actions,
        skip_frame=
        1,  # number of environment steps to skip before returning next environment response
        order_size=None,
        initial_action=None,
        initial_portfolio_action=None,
        state_int_scale=1,
        state_ext_scale=1,
    )

    def __init__(self, **kwargs):
        """
        Keyword Args:
            params (dict):          parameters dictionary, see Note below.

            Notes:
                Due to backtrader convention, any strategy arguments should be defined inside `params` dictionary
                or passed as kwargs to bt.Cerebro() class via .addstrategy() method. Parameter dictionary
                should contain at least these keys::

                    state_shape:        Observation state shape is dictionary of Gym spaces, by convention
                                        first dimension of every Gym Box space is time embedding one;
                    cash_name:          str, name for cash asset
                    asset_names:        iterable of str, names for assets
                    start_cash:         float, broker starting cash
                    commission:         float, broker commission value, .01 stands for 1%
                    leverage:           float, broker leverage
                    slippage:           float, broker execution slippage
                    order_size:         dict of fixed order stakes (floats); keys should match assets names.
                    drawdown_call:      finish episode when hitting this drawdown treshghold , in percent.
                    portfolio_actions:  possible agent actions.
                    skip_frame:         number of environment steps to skip before returning next response,
                                        e.g. if set to 10 -- agent will interact with environment every 10th step;
                                        every other step agent action is assumed to be 'hold'.

                Default values are::

                    state_shape=dict(raw_state=spaces.Box(shape=(4, 4), low=0, high=0,))
                    cash_name='default_cash'
                    asset_names=['default_asset']
                    start_cash=None
                    commission=None
                    slippage=None,
                    leverage=1.0
                    drawdown_call=10
                    dataset_stat=None
                    episode_stat=None
                    portfolio_actions=('hold', 'buy', 'sell', 'close')
                    skip_frame=1
                    order_size=None
        """
        # Inherit logger from cerebro:
        self.log = self.env._log

        assert self.p.avg_period + 2 < self.p.time_dim, 'Doh!'

        self.skip_frame = self.p.skip_frame

        self.iteration = 0
        self.pre_iteration = 0
        self.env_iteration = 0
        self.inner_embedding = 1
        self.is_done = False
        self.is_done_enabled = False
        self.steps_till_is_done = 2  # extra steps to make when episode terminal conditions are met
        self.action = self.p.initial_portfolio_action
        self.action_to_repeat = self.p.initial_portfolio_action
        self.action_repeated = 0
        self.num_action_repeats = None
        self.reward = 0
        self.order = None
        self.order_failed = 0
        self.broker_message = '_'
        self.final_message = '_'
        self.raw_state = None
        self.time_stamp = 0

        # Prepare broker:
        if self.p.start_cash is not None:
            self.env.broker.setcash(self.p.start_cash)

        if self.p.commission is not None:
            self.env.broker.setcommission(commission=self.p.commission,
                                          leverage=self.p.leverage)

        if self.p.slippage is not None:
            # Bid/ask workaround: set overkill 10% slippage + slip_out=False
            # ensuring we always buy at current 'high'~'ask' and sell at 'low'~'bid':
            self.env.broker.set_slippage_perc(self.p.slippage,
                                              slip_open=True,
                                              slip_match=True,
                                              slip_out=False)

        # self.target_value = self.env.broker.startingcash * (1 + self.p.target_call / 100)

        # Try to define stake, if no self.p.order_size dict has been set:
        if self.p.order_size is None:
            # If no order size has been set for every data_line,
            # try to infer stake size from sizer set by bt.Cerebro.addsizer() method:
            try:
                assert len(list(self.env.sizers.values())) == 1
                env_sizer_params = list(self.env.sizers.values())[0][
                    -1]  # pull dict of outer set sizer params
                assert 'stake' in env_sizer_params.keys()

            except (AssertionError, KeyError) as e:
                msg = 'Order stake is not set neither via strategy.param.order_size nor via bt.Cerebro.addsizer method.'
                self.log.error(msg)
                raise ValueError(msg)

            self.p.order_size = {
                name: env_sizer_params['stake']
                for name in self.p.asset_names
            }

        elif isinstance(self.p.order_size, int) or isinstance(
                self.p.order_size, float):
            unimodal_stake = {
                name: self.p.order_size
                for name in self.getdatanames()
            }
            self.p.order_size = unimodal_stake

        # Current effective order sizes:
        self.current_order_sizes = None

        # Current stat normalisation:
        self.normalizer = 1.0

        # self.log.warning('asset names: {}'.format(self.p.asset_names))
        # self.log.warning('data names: {}'.format(self.getdatanames()))

        self.trade_just_closed = False
        self.trade_result = 0

        self.unrealized_pnl = None
        self.norm_broker_value = None
        self.realized_pnl = None

        self.current_pos_duration = 0
        self.current_pos_min_value = 0
        self.current_pos_max_value = 0

        self.realized_broker_value = self.env.broker.startingcash
        self.episode_result = 0  # not used

        # Service sma to get correct first features values:
        self.data.dim_sma = btind.SimpleMovingAverage(self.datas[0],
                                                      period=self.p.time_dim)
        self.data.dim_sma.plotinfo.plot = False

        # self.log.warning('self.p.dir: {}'.format(dir(self.params)))

        # Episode-wide metadata:
        self.metadata = {
            'type': np.asarray(self.p.metadata['type']),
            'trial_num': np.asarray(self.p.metadata['parent_sample_num']),
            'trial_type': np.asarray(self.p.metadata['parent_sample_type']),
            'sample_num': np.asarray(self.p.metadata['sample_num']),
            'first_row': np.asarray(self.p.metadata['first_row']),
            'timestamp': np.asarray(self.time_stamp, dtype=np.float64)
        }
        self.state = {'raw': None, 'metadata': None}

        # If it is train or test episode?
        # default logic: true iff. it is test episode from target domain:
        self.is_test = self.metadata['type'] and self.metadata['trial_type']

        # This flag shows to the outer world if this episode can broadcast world-state information, e.g. move global
        # time forward (see: btgym.server._BTgymAnalyzer.next() method);
        self.can_broadcast = self.is_test

        self.log.debug('strategy.metadata: {}'.format(self.metadata))
        self.log.debug('is_test: {}'.format(self.is_test))

        # Broker data lines of interest (used for estimation inner state of agent:
        self.broker_datalines = [
            'cash',
            'value',
            'exposure',
            'drawdown',
            'pos_duration',
            'realized_pnl',
            'unrealized_pnl',
            'min_unrealized_pnl',
            'max_unrealized_pnl',
            'total_unrealized_pnl',
        ]
        # Define flat collection dictionary looking up for methods for estimating broker statistics,
        # one method for one mode, should be named .get_broker_[mode_name]():
        self.collection_get_broker_stat_methods = {}
        for line in self.broker_datalines:
            try:
                self.collection_get_broker_stat_methods[line] = getattr(
                    self, 'get_broker_{}'.format(line))

            except AttributeError:
                raise NotImplementedError(
                    'Callable get_broker_{}.() not found'.format(line))

        # Broker and account related sliding statistics accumulators:
        self.broker_stat = {
            key: np.zeros(self.avg_period)
            for key in self.broker_datalines
        }

        # This data line will be used to by default to
        # define normalisation bounds (can be overiden via .set_datalines()):
        self.stat_asset = self.data.open

        # Add custom data Lines if any [and possibly redefine stat_asset and order_size_normalizer]:
        self.set_datalines()

        # Normalisation statistics estimator (updated via update_broker_stat.()):
        self.norm_stat_tracker = Zscore(1, alpha=self.p.norm_alpha)
        self.normalisation_state = NormalisationState(0, 0, .9, 1.1)

        # State exp. smoothing params:
        self.internal_state_discount = np.cumprod(
            np.tile(1 - 1 / self.p.avg_period, self.p.avg_period))[::-1]
        self.external_state_discount = None  # not used

        # Define flat collection dictionary looking for methods for estimating observation state,
        # one method per one mode, should be named .get_[mode_name]_state():
        self.collection_get_state_methods = {}
        for key in self.p.state_shape.keys():
            try:
                self.collection_get_state_methods[key] = getattr(
                    self, 'get_{}_state'.format(key))

            except AttributeError:
                raise NotImplementedError(
                    'Callable get_{}_state.() not found'.format(key))

        for data in self.datas:
            self.log.debug('data_name: {}'.format(data._name))

        self.log.debug('stake size: {}'.format(self.p.order_size))

        # Define how this strategy should handle actions: either as discrete or continuous:
        if self.p.portfolio_actions is None or set(
                self.p.portfolio_actions) == {}:
            # No discrete actions provided, assume continuous:
            try:
                assert self.p.skip_frame > 1

            except AssertionError:
                msg = 'For continuous actions it is essential to set `skip_frame` parameter > 1, got: {}'.format(
                    self.p.skip_frame)
                self.log.error(msg)
                raise ValueError(msg)
            # Disable broker checking margin,
            # see: https://community.backtrader.com/topic/152/multi-asset-ranking-and-rebalancing/2?page=1
            self.env.broker.set_checksubmit(False)
            self.next_process_fn = self._next_target_percent
            # Repeat action 2 times:
            self.num_action_repeats = 2

        else:
            # Use discrete handling method otherwise:
            self.env.broker.set_checksubmit(True)
            self.next_process_fn = self._next_discrete
            # self.log.warning('DISCRETE')
            # Do not repeat action for discrete:
            self.num_action_repeats = 0

    def prenext(self):
        if self.pre_iteration + 2 > self.p.time_dim - self.avg_period:
            self.update_broker_stat()

        elif self.pre_iteration + 2 == self.p.time_dim - self.avg_period:
            _ = self.norm_stat_tracker.reset(
                np.asarray(self.stat_asset.get(
                    size=self.data.close.buflen()))[None, :])

        self.pre_iteration += 1

    def nextstart(self):
        self.inner_embedding = self.data.close.buflen()
        # self.log.warning('Inner time embedding: {}'.format(self.inner_embedding))
        # for k, v in self.broker_stat.items():
        #     self.log.warning('{}: {}'.format(k, len(v)))

    def next(self):
        """
        Default implementation for built-in backtrader method.
        Defines one step environment routine;
        Handles order execution logic according to action received.
        Note that orders can only be submitted for data_lines in action_space (assets).
        `self.action` attr. is updated by btgym.server._BTgymAnalyzer, and `None` actions
        are emitted while doing `skip_frame` loop.
        """
        self.update_broker_stat()

        if '_skip_this' in self.action.keys():
            # print('a_skip, b_message: ', self.broker_message)
            if self.action_repeated < self.num_action_repeats:
                self.next_process_fn(self.action_to_repeat)
                self.action_repeated += 1

        else:
            self.next_process_fn(self.action)
            self.action_repeated = 0
            self.action_to_repeat = self.action
            # print('a_process, b_message: ', self.broker_message)

    def notify_trade(self, trade):
        if trade.isclosed:
            # Set trade flags: True if trade have been closed just now and within last frame-skip period,
            # and store trade result:
            self.trade_just_closed = True
            # Note: `trade_just_closed` flag has to be reset manually after evaluating.
            self.trade_result += trade.pnlcomm

            # Store realized prtfolio value:
            self.realized_broker_value = self.broker.get_value()
            # self.log.warning('notify_trade: trade_pnl: {}, cum_trade_result: {}, realized_value: {}'.format(
            #     trade.pnlcomm, self.trade_result, self.realized_broker_value)
            # )

    def update_broker_stat(self):
        """
        Updates all sliding broker statistics with latest-step values such as:
            - normalized broker value
            - normalized broker cash
            - normalized exposure (position size)
            - exp. scaled episode duration in steps, normalized wrt. max possible episode steps
            - normalized realized profit/loss for last closed trade (is zero if no pos. closures within last env. step)
            - normalized profit/loss for current opened trade (unrealized p/l);
        """
        # Update current account value:
        current_value = self.env.broker.get_value()

        # ...normalisation bounds:
        norm_state = self.get_normalisation()

        # ..current order sizes:

        # order_sizes = self.get_order_sizes()

        # ...individual positions for each instrument traded:
        positions = [self.env.broker.getposition(data) for data in self.datas]

        # ... total cash exposure:
        exposure = sum([abs(pos.size) for pos in positions])

        # ... tracking normalisation constant:

        self.normalizer = 1 / np.clip(
            (norm_state.up_interval - norm_state.low_interval), 1e-8, None)

        # print('norm_state: ', norm_state)
        # print('normalizer: ', normalizer)
        # print('self.current_order_sizes: ', self.current_order_sizes)

        for key, method in self.collection_get_broker_stat_methods.items():
            update = method(
                current_value=current_value,
                positions=positions,
                exposure=exposure,
                lower_bound=norm_state.low_interval,
                upper_bound=norm_state.up_interval,
                normalizer=self.normalizer,
            )
            # Update accumulator:
            self.broker_stat[key] = np.concatenate(
                [self.broker_stat[key][1:],
                 np.asarray([float(update)])])

        # Reset one-time flags:
        self.trade_just_closed = False
        self.trade_result = 0

    def get_normalisation(self):
        """
        Estimates current normalisation constants, updates `normalisation_state` attr.

        Returns:
            instance of NormalisationState tuple
        """
        # Update normalizer stat:
        stat_data = np.asarray(self.stat_asset.get(size=1))
        mean, var = self.norm_stat_tracker.update(stat_data[None, :])
        var = np.clip(var, 1e-8, None)

        # Use 99% N(stat_data_mean, stat_data_std) intervals as normalisation interval:
        intervals = stats.norm.interval(.99, mean, var**.5)
        self.normalisation_state = NormalisationState(
            mean=float(mean),
            variance=float(var),
            low_interval=intervals[0][0],
            up_interval=intervals[1][0])
        return self.normalisation_state

    def get_order_sizes(self):
        """
        Estimates current order sizes for assets in trade, sets attribute.

        Returns:
            array-like of floats
        """
        # Default implementation for fixed-size orders:
        self.current_order_sizes = np.fromiter(self.p.order_size.values(),
                                               dtype=np.float)
        return self.current_order_sizes

    def get_broker_value(self, current_value, normalizer, **kwargs):
        """

        Args:
            current_value:  float, current portfolio value
            lower_bound:    float, lower normalisation constant
            upper_bound:    float, upper normalisation constant

        Returns:
            broker value normalized w.r.t. start value.
        """
        return (
            current_value - self.env.broker.startingcash
        ) / self.env.broker.startingcash / self.p.leverage  #* normalizer

    def get_broker_cash(self, current_value, **kwargs):
        """
        Args:
            current_value:    float, current portfolio value

        Returns:
            broker cash normalized w.r.t. current value.
        """
        return self.env.broker.get_cash() / current_value

    def get_broker_exposure(self, exposure, normalizer, **kwargs):
        """
        Args:
            exposure:   float, current total position exposure

        Returns:
            exposure (position size) normalized w.r.t. single order size.
        """
        return exposure * normalizer  #/ self.current_order_sizes.mean()

    def get_broker_realized_pnl(self, normalizer, **kwargs):
        """

        Args:
            normalizer:     float, normalisation constant

        Returns:
            normalized realized profit/loss for last closed trade (is zero if no pos. closures within last env. step)
        """

        if self.trade_just_closed:
            pnl = self.trade_result * normalizer

        else:
            pnl = 0.0
        return pnl

    def get_broker_unrealized_pnl(self, current_value, normalizer, **kwargs):
        """

        Args:
            current_value:  float, current portfolio value
            normalizer:     float, normalisation constant

        Returns:
            normalized profit/loss for current opened trade
        """
        pnl = (current_value - self.realized_broker_value) * normalizer

        return pnl

    def get_broker_total_unrealized_pnl(self, current_value, normalizer,
                                        **kwargs):
        """
        REDUNDANT
        Args:
            current_value:  float, current portfolio value
            normalizer:     float, normalisation constant


        Returns:
            normalized profit/loss wrt. initial portfolio value
        """
        pnl = (current_value -
               self.env.broker.startingcash) * self.env.broker.startingcash

        return pnl

    def get_broker_drawdown(self, **kwargs):
        """

        Returns:
            current drawdown value
        """
        try:
            dd = self.stats.drawdown.drawdown[-1] / self.p.drawdown_call
        except IndexError:
            dd = 0.0
        return dd

    def get_broker_pos_duration(self, exposure, **kwargs):
        """

        Args:
            exposure:   float, current total positions exposure

        Returns:
            int, number of ticks current position is being held
        """
        if exposure == 0:
            self.current_pos_duration = 0
            # print('ZERO_POSITION\n')

        else:
            self.current_pos_duration += 1

        return self.current_pos_duration

    def get_broker_max_unrealized_pnl(self, current_value, exposure,
                                      normalizer, **kwargs):
        """

        Args:
            exposure:       float, current total positions exposure
            current_value:  float, current portfolio value
            normalizer:     float, normalisation constant

        Returns:
            best unrealised PnL achieved within current opened position

        """
        if exposure == 0:
            self.current_pos_max_value = current_value

        else:
            if self.current_pos_max_value < current_value:
                self.current_pos_max_value = current_value

        pnl = (self.current_pos_max_value -
               self.realized_broker_value) * normalizer

        return pnl

    def get_broker_min_unrealized_pnl(self, current_value, exposure,
                                      normalizer, **kwargs):
        """

        Args:
            exposure:       float, current total positions exposure
            current_value:  float, current portfolio value
            normalizer:     float, normalisation constant

        Returns:
            worst unrealised PnL achieved within current opened position
        """
        if exposure == 0:
            self.current_pos_min_value = current_value

        else:
            if self.current_pos_min_value > current_value:
                self.current_pos_min_value = current_value

        pnl = (self.current_pos_min_value -
               self.realized_broker_value) * normalizer

        return pnl

    def set_datalines(self):
        """
        Default datalines are: Open, Low, High, Close, Volume.
        Any other custom data lines, indicators, etc. should be explicitly defined by overriding this method.
        Invoked once by Strategy.__init__().
        """
        pass

    def get_raw_state(self):
        """
        Default state observation composer.

        Returns:
             and updates time-embedded environment state observation as [n, 4] numpy matrix, where:
                4 - number of signal features  == state_shape[1],
                n - time-embedding length  == state_shape[0] == <set by user>.

        Note:
            `self.raw_state` is used to render environment `human` mode and should not be modified.

        """
        self.raw_state = np.row_stack((
            np.frombuffer(self.data.open.get(size=self.time_dim)),
            np.frombuffer(self.data.high.get(size=self.time_dim)),
            np.frombuffer(self.data.low.get(size=self.time_dim)),
            np.frombuffer(self.data.close.get(size=self.time_dim)),
        )).T

        return self.raw_state

    def get_stat_state(self):
        return np.asarray(self.norm_stat_tracker.get_state())

    def get_internal_state(self):
        stat_lines = ('value', 'unrealized_pnl', 'realized_pnl', 'cash',
                      'exposure')
        # Use smoothed values:
        x_broker = np.stack([
            np.asarray(self.broker_stat[name]) * self.internal_state_discount
            for name in stat_lines
        ],
                            axis=-1)
        # x_broker = np.gradient(x_broker, axis=-1)
        return np.clip(x_broker[:, None, :], -100, 100)

    def get_metadata_state(self):
        self.metadata['timestamp'] = np.asarray(self._get_timestamp())

        return self.metadata

    def _get_time(self):
        """
        Retrieves current time point of the episode data.

        Returns:
            datetime object
        """
        return self.data.datetime.datetime()

    def _get_timestamp(self):
        """
        Sets attr. and returns current data timestamp.

        Returns:
            POSIX timestamp
        """
        self.time_stamp = self._get_time().timestamp()

        return self.time_stamp

    def _get_broadcast_info(self):
        """
        Transmits broadcasting message.

        Returns:
            dictionary  or None
        """
        try:
            return self.get_broadcast_message()

        except AttributeError:
            return None

    def get_broadcast_message(self):
        """
        Override this.

        Returns:
            dictionary or None
        """
        return None

    def get_state(self):
        """
        Collects estimated values for every mode of observation space by calling methods from
        `collection_get_state_methods` dictionary.
        As a rule, this method should not be modified, override or implement corresponding get_[mode]_state() methods,
        defining necessary calculations and return properly shaped tensors for every space mode.

        Note:
            - 'data' referes to bt.startegy datafeeds and should be treated as such.
                Datafeed Lines that are not default to BTgymStrategy should be explicitly defined by
                 __init__() or define_datalines().
        """
        # Update inner state statistic and compose state: <- moved to .next()
        # self.update_broker_stat()
        self.state = {
            key: method()
            for key, method in self.collection_get_state_methods.items()
        }
        return self.state

    def get_reward(self):
        """
        Shapes reward function as normalized single trade realized profit/loss,
        augmented with potential-based reward shaping functions in form of:
        F(s, a, s`) = gamma * FI(s`) - FI(s);
        Potential FI_1 is current normalized unrealized profit/loss.

        Paper:
            "Policy invariance under reward transformations:
             Theory and application to reward shaping" by A. Ng et al., 1999;
             http://www.robotics.stanford.edu/~ang/papers/shaping-icml99.pdf
        """

        # All sliding statistics for this step are already updated by get_state().

        # Potential-based shaping function 1:
        # based on potential of averaged profit/loss for current opened trade (unrealized p/l):
        unrealised_pnl = np.asarray(self.broker_stat['unrealized_pnl'])
        current_pos_duration = int(self.broker_stat['pos_duration'][-1])

        #self.log.warning('current_pos_duration: {}'.format(current_pos_duration))

        # We want to estimate potential `fi = gamma*fi_prime - fi` of current opened position,
        # thus need to consider different cases given skip_fame parameter:
        if current_pos_duration == 0:
            # Set potential term to zero if there is no opened positions:
            f1 = 0
            fi_1_prime = 0
        else:
            if current_pos_duration < self.p.skip_frame:
                fi_1 = 0
                fi_1_prime = np.average(unrealised_pnl[-current_pos_duration:])

            elif current_pos_duration < 2 * self.p.skip_frame:
                fi_1 = np.average(
                    unrealised_pnl[-(self.p.skip_frame +
                                     current_pos_duration):-self.p.skip_frame])
                fi_1_prime = np.average(unrealised_pnl[-self.p.skip_frame:])

            else:
                fi_1 = np.average(
                    unrealised_pnl[-2 * self.p.skip_frame:-self.p.skip_frame])
                fi_1_prime = np.average(unrealised_pnl[-self.p.skip_frame:])

            # Potential term:
            f1 = self.p.gamma * fi_1_prime - fi_1

        # Main reward function: normalized realized profit/loss:
        realized_pnl = np.asarray(
            self.broker_stat['realized_pnl'])[-self.p.skip_frame:].sum()

        # Weights are subject to tune:
        self.reward = (0.1 * f1 + 1.0 *
                       realized_pnl) * self.p.reward_scale  #/ self.normalizer
        # self.reward = np.clip(self.reward, -self.p.reward_scale, self.p.reward_scale)
        self.reward = np.clip(self.reward, -1e3, 1e3)

        return self.reward

    def get_info(self):
        """
        Composes information part of environment response,
        can be any object. Override to own taste.

        Note:
            Due to 'skip_frame' feature, INFO part of environment response transmitted by server can be  a list
            containing either all skipped frame's info objects, i.e. [info[-9], info[-8], ..., info[0]] or
            just latest one, [info[0]]. This behaviour is set inside btgym.server._BTgymAnalyzer().next() method.
        """
        return dict(
            step=self.iteration,
            time=self.data.datetime.datetime(),
            action=self.action,
            broker_message=self.broker_message,
            broker_cash=self.stats.broker.cash[0],
            broker_value=self.stats.broker.value[0],
            drawdown=self.stats.drawdown.drawdown[0],
            max_drawdown=self.stats.drawdown.maxdrawdown[0],
        )

    def get_done(self):
        """
        Episode termination estimator,
        defines any trading logic conditions episode stop is called upon, e.g. <OMG! Stop it, we became too rich!>.
        It is just a structural a convention method. Default method is empty.

        Expected to return:
            tuple (<is_done, type=bool>, <message, type=str>).
        """
        return False, '-'

    def _get_done(self):
        """
        Default episode termination method,
        checks base conditions episode stop is called upon:
            1. Reached maximum episode duration. Need to check it explicitly, because <self.is_done> flag
               is sent as part of environment response.
            2. Got '_done' signal from outside. E.g. via env.reset() method invoked by outer RL algorithm.
            3. Hit `drawdown` threshold.

        This method shouldn't be overridden or called explicitly.

        Runtime execution logic is:
            terminate episode if:
                get_done() returned (True, 'something')
                OR
                ANY _get_done() default condition is met.
        """
        if not self.is_done_enabled:
            # Episode is on its way,
            # apply base episode termination rules:
            is_done_rules = [
                # Do we approaching the end of the episode?:
                (self.iteration >= \
                 self.data.numrecords - self.inner_embedding - self.p.skip_frame - self.steps_till_is_done,
                 'END OF DATA'),
                # Any money left?:
                (self.stats.drawdown.maxdrawdown[0] >= self.p.drawdown_call, 'DRAWDOWN CALL'),
            ]
            # Append custom get_done() results, if any:
            is_done_rules += [self.get_done()]

            # Sweep through rules:
            for (condition, message) in is_done_rules:
                if condition:
                    # Start episode termination countdown for clean exit:
                    # to forcefully execute final `close` order and compute proper reward
                    # we need to make `steps_till_is_done` number of steps until `is_done` flag can be safely risen:
                    self.is_done_enabled = True
                    self.broker_message += message
                    self.final_message = message
                    self.order = self.close()
                    self.log.debug(
                        'Episode countdown started at: {}, {}, r:{}'.format(
                            self.iteration, message, self.reward))

        else:
            # Now in episode termination phase,
            # just keep hitting `Close` button:
            self.steps_till_is_done -= 1
            self.broker_message = 'CLOSE, {}'.format(self.final_message)
            self.order = self.close()
            self.log.debug('Episode countdown contd. at: {}, {}, r:{}'.format(
                self.iteration, self.broker_message, self.reward))

        if self.steps_till_is_done <= 0:
            # Now we've done, terminate:
            self.is_done = True

        return self.is_done

    def notify_order(self, order):
        """
        Shamelessly taken from backtrader tutorial.
        TODO: better multi data support
        """
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.broker_message = 'BUY executed,\nPrice: {:.5f}, Cost: {:.4f}, Comm: {:.4f}'. \
                    format(order.executed.price,
                           order.executed.value,
                           order.executed.comm)
                self.buyprice = order.executed.price
                self.buycomm = order.executed.comm

            else:  # Sell
                self.broker_message = 'SELL executed,\nPrice: {:.5f}, Cost: {:.4f}, Comm: {:.4f}'. \
                    format(order.executed.price,
                           order.executed.value,
                           order.executed.comm)
            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.broker_message = 'ORDER FAILED with status: ' + str(
                order.getstatusname())
            # Rise order_failed flag until get_reward() will [hopefully] use and reset it:
            self.order_failed += 1
        # self.log.warning('BM: {}'.format(self.broker_message))
        self.order = None

    def _next_discrete(self, action):
        """
        Default implementation for discrete actions.
        Note that orders can be submitted only for data_lines in action_space (assets).

        Args:
            action:     dict, string encoding of btgym.spaces.ActionDictSpace

        """
        for key, single_action in action.items():
            # Simple action-to-order logic:
            if single_action == 'hold' or self.is_done_enabled:
                pass
            elif single_action == 'buy':
                self.order = self.buy(data=key, size=self.p.order_size[key])
                self.broker_message = 'new {}_BUY created; '.format(
                    key) + self.broker_message
            elif single_action == 'sell':
                self.order = self.sell(data=key, size=self.p.order_size[key])
                self.broker_message = 'new {}_SELL created; '.format(
                    key) + self.broker_message
            elif single_action == 'close':
                self.order = self.close(data=key)
                self.broker_message = 'new {}_CLOSE created; '.format(
                    key) + self.broker_message

        # Somewhere after this point, server-side _BTgymAnalyzer() is exchanging information with environment wrapper,
        # obtaining <self.action> , composing and sending <state,reward,done,info> etc... never mind.

    def _next_target_percent(self, action):
        """
        Uses `order_target_percent` method to rebalance assets to given ratios. Expects action for every asset to be
        a float scalar in [0,1], with actions sum to 1 over all assets (including base one).
        Note that action for base asset (cash) is ignored.
        For details refer to: https://www.backtrader.com/docu/order_target/order_target.html
        """
        # TODO 1: filter similar actions to prevent excessive orders issue e.g by DKL on two consecutive ones
        # TODO 2: actions discretisation on level of execution
        for asset in self.p.asset_names:
            # Reducing assets positions subj to 5% margin reserve:
            single_action = round(float(action[asset]) * 0.9, 2)
            self.order = self.order_target_percent(data=asset,
                                                   target=single_action)
            self.broker_message += ' new {}->{:1.0f}% created; '.format(
                asset, single_action * 100)
예제 #30
0
class UPDMStrategy(BaseStrategy5):
    """
    Test TimeSeriesModel decomposition.
    """
    time_dim = 128
    avg_period = 90
    portfolio_actions = ('hold', 'buy', 'sell', 'close')

    features_parameters = None
    num_features = 3
    params = dict(
        state_shape={
            'external': spaces.Box(low=-10, high=10, shape=(time_dim, 1, num_features), dtype=np.float32),
            'internal': spaces.Box(low=-2, high=2, shape=(avg_period, 1, 6), dtype=np.float32),
            'metadata': DictSpace(
                {
                    'type': spaces.Box(
                        shape=(),
                        low=0,
                        high=1,
                        dtype=np.uint32
                    ),
                    'trial_num': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10,
                        dtype=np.uint32
                    ),
                    'trial_type': spaces.Box(
                        shape=(),
                        low=0,
                        high=1,
                        dtype=np.uint32
                    ),
                    'sample_num': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10,
                        dtype=np.uint32
                    ),
                    'first_row': spaces.Box(
                        shape=(),
                        low=0,
                        high=10 ** 10,
                        dtype=np.uint32
                    ),
                    'timestamp': spaces.Box(
                        shape=(),
                        low=0,
                        high=np.finfo(np.float64).max,
                        dtype=np.float64
                    ),
                }
            )
        },
        data_model_params=dict(
            alpha=.001,
            stat_alpha=.0001,
            filter_alpha=.05,
            max_length=time_dim * 2,
            analyzer_window=10,
            analyzer_grouping=[[0, 1], [1, 2], [2, 3], [3, None]],
        ),
        cash_name='default_cash',
        asset_names=['default_asset'],
        start_cash=None,
        commission=None,
        slippage=None,
        leverage=1.0,
        gamma=0.99,  # fi_gamma, should match MDP gamma decay
        reward_scale=1,  # reward multiplicator
        drawdown_call=10,  # finish episode when hitting drawdown treshghold , in percent.
        target_call=10,  # finish episode when reaching profit target, in percent.
        dataset_stat=None,  # Summary descriptive statistics for entire dataset and
        episode_stat=None,  # current episode. Got updated by server.
        time_dim=time_dim,  # time embedding period
        avg_period=avg_period,  # number of time steps reward estimation statistics are averaged over
        features_parameters=features_parameters,
        num_features=num_features,
        metadata={},
        broadcast_message={},
        trial_stat=None,
        trial_metadata=None,
        portfolio_actions=portfolio_actions,
        skip_frame=1,  # number of environment steps to skip before returning next environment response
        order_size=None,
        initial_action=None,
        initial_portfolio_action=None,
        state_int_scale=1,
        state_ext_scale=1,
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.data_model = PriceModel(**self.p.data_model_params)

    def set_datalines(self):
        initial_time_period = self.p.time_dim
        self.data.dim_sma = btind.SimpleMovingAverage(
            self.datas[0],
            period=initial_time_period
        )
        self.data.dim_sma.plotinfo.plot = False

    def nextstart(self):
        self.inner_embedding = self.data.close.buflen()
        self.log.debug('Inner time embedding: {}'.format(self.inner_embedding))
        self.data_model.reset(np.asarray(self.data.get(size=self.inner_embedding)))

    def get_external_state(self):
        x_upd = np.asarray(self.data.get(size=self.p.skip_frame))
        self.data_model.update(x_upd)

        x_ssa = self.data_model.transform(size=self.p.time_dim).T

        # Gradient along features axis:
        # dx = np.gradient(x_ssa, axis=-1)
        #
        # # Add up: gradient  along time axis:
        # # dx2 = np.gradient(dx, axis=0)
        #
        # # TODO: different conv. encoders for these two types of features:
        # x = np.concatenate([x_ssa_bank, dx], axis=-1)

        # Crop outliers:
        x_ssa = np.clip(x_ssa, -10, 10)
        return x_ssa[:, None, :-1]

    def get_internal_state(self):

        x_broker = np.concatenate(
            [
                np.asarray(self.broker_stat['value'])[..., None],
                np.asarray(self.broker_stat['unrealized_pnl'])[..., None],
                np.asarray(self.broker_stat['total_unrealized_pnl'])[..., None],
                np.asarray(self.broker_stat['realized_pnl'])[..., None],
                np.asarray(self.broker_stat['cash'])[..., None],
                np.asarray(self.broker_stat['exposure'])[..., None],
            ],
            axis=-1
        )
        x_broker = tanh(np.gradient(x_broker, axis=-1) * self.p.state_int_scale)
        return x_broker[:, None, :]