Esempio n. 1
0
    def _get_expected_imbalance(self, window, imbalance_array):
        """
        Calculate the expected imbalance as defined on page 31 and 32.
        :param window: EWMA window for calculation
        :param imbalance_array: numpy array of imbalances [buy, sell]
        :return: expected_buy_proportion and expected_sell_proportion
        """
        if len(imbalance_array['buy']) < self.exp_num_ticks_init:
            # Waiting for array to fill for ewma
            ewma_window = np.nan
        else:
            # ewma window can be either the window specified in a function call
            # or it is len of imbalance_array if window > len(imbalance_array)
            ewma_window = int(min(len(imbalance_array), window))

        if np.isnan(ewma_window):
            exp_buy_proportion, exp_sell_proportion = np.nan, np.nan
        else:
            buy_sample = np.array(imbalance_array['buy'][-ewma_window:],
                                  dtype=float)
            sell_sample = np.array(imbalance_array['sell'][-ewma_window:],
                                   dtype=float)
            exp_buy_proportion = ewma(buy_sample, window=ewma_window)[-1]
            exp_sell_proportion = ewma(sell_sample, window=ewma_window)[-1]

        return exp_buy_proportion, exp_sell_proportion
Esempio n. 2
0
    def _get_expected_imbalance(self,
                                array: list,
                                window: int,
                                warm_up: bool = False):
        """
        Advances in Financial Machine Learning, page 29.

        Calculates the expected imbalance: 2P[b_t=1]-1, using a EWMA.

        :param array: (list) of imbalances
        :param window: (int) EWMA window for calculation
        :parawm warm_up: (bool) flag of whether warm up period passed
        :return: expected_imbalance: (np.ndarray) 2P[b_t=1]-1, approximated using a EWMA
        """
        if len(array) < self.thresholds['exp_num_ticks'] and warm_up is True:
            # Waiting for array to fill for ewma
            ewma_window = np.nan
        else:
            # ewma window can be either the window specified in a function call
            # or it is len of imbalance_array if window > len(imbalance_array)
            ewma_window = int(min(len(array), window))

        if np.isnan(ewma_window):
            # return nan, wait until len(self.imbalance_array) >= self.exp_num_ticks_init
            expected_imbalance = np.nan
        else:
            expected_imbalance = ewma(np.array(array[-ewma_window:],
                                               dtype=float),
                                      window=ewma_window)[-1]

        return expected_imbalance
Esempio n. 3
0
    def _get_expected_imbalance(self, window: int):
        """
        Calculate the expected imbalance: 2P[b_t=1]-1, using a EWMA, pg 29
        :param window: (int) EWMA window for calculation
        :return: expected_imbalance: (np.ndarray) 2P[b_t=1]-1, approximated using a EWMA
        """
        if len(self.imbalance_tick_statistics['imbalance_array']
               ) < self.thresholds['exp_num_ticks']:
            # Waiting for array to fill for ewma
            ewma_window = np.nan
        else:
            # ewma window can be either the window specified in a function call
            # or it is len of imbalance_array if window > len(imbalance_array)
            ewma_window = int(
                min(len(self.imbalance_tick_statistics['imbalance_array']),
                    window))

        if np.isnan(ewma_window):
            # return nan, wait until len(self.imbalance_array) >= self.exp_num_ticks_init
            expected_imbalance = np.nan
        else:
            expected_imbalance = ewma(
                np.array(self.imbalance_tick_statistics['imbalance_array']
                         [-ewma_window:],
                         dtype=float),
                window=ewma_window)[-1]

        return expected_imbalance
Esempio n. 4
0
 def _get_exp_num_ticks(self):
     prev_num_of_ticks = self.imbalance_tick_statistics['num_ticks_bar']
     exp_num_ticks = ewma(
         np.array(prev_num_of_ticks[-self.num_prev_bars:], dtype=float),
         self.num_prev_bars)[-1]
     return min(max(exp_num_ticks, self.min_exp_num_ticks),
                self.max_exp_num_ticks)
    def _extract_bars(self, data):
        """
        For loop which compiles the various imbalance bars: dollar, volume, or tick.

        :param data: (DataFrame) Contains 3 columns - date_time, price, and volume.
        :return: (List) of bars built using the current batch.
        """
        cum_ticks, cum_volume, cum_theta, high_price, low_price = self._update_counters()

        # Iterate over rows
        list_bars = []
        for row in data.values:
            # Set variables
            cum_ticks += 1
            date_time = row[0]
            price = np.float(row[1])
            volume = row[2]
            cum_volume += volume

            # Update high low prices
            high_price, low_price = self._update_high_low(
                high_price, low_price, price)

            # Imbalance calculations
            signed_tick = self._apply_tick_rule(price)
            imbalance = self._get_imbalance(price, signed_tick, volume)
            self.imbalance_array.append(imbalance)
            cum_theta += imbalance

            if not list_bars and np.isnan(self.expected_imbalance):
                self.expected_imbalance = self._get_expected_imbalance(
                    self.exp_num_ticks, self.imbalance_array)

            # Update cache
            self._update_cache(date_time, price, low_price,
                               high_price, cum_ticks, cum_volume, cum_theta)

            # Check expression for possible bar generation
            if np.abs(cum_theta) > self.exp_num_ticks * np.abs(self.expected_imbalance):
                self._create_bars(date_time, price,
                                  high_price, low_price, list_bars)

                self.num_ticks_bar.append(cum_ticks)
                # Expected number of ticks based on formed bars
                self.exp_num_ticks = ewma(np.array(
                    self.num_ticks_bar[-self.num_prev_bars:], dtype=float), self.num_prev_bars)[-1]

                self.expected_imbalance = self._get_expected_imbalance(
                    self.exp_num_ticks * self.num_prev_bars, self.imbalance_array)
                # Reset counters
                cum_ticks, cum_volume, cum_theta = 0, 0, 0
                high_price, low_price = -np.inf, np.inf
                self.cache = []

                # Update cache after bar generation (exp_num_ticks was changed after bar generation)
                self._update_cache(date_time, price, low_price,
                                   high_price, cum_ticks, cum_volume, cum_theta)
        return list_bars
Esempio n. 6
0
    def test_ewma(self):
        """
        Tests the imbalance dollar bars implementation.
        """
        test_sample = pd.read_csv(self.path)
        price_arr = np.array(test_sample.Price.values, dtype=float)
        ewma_res = ewma(price_arr, window=20)

        # Assert output array length equals input array length
        self.assertTrue(ewma_res.shape == price_arr.shape)
        # Assert the first value of ewma equals to input array value
        self.assertTrue(ewma_res[0] == price_arr[0])
        # Assert next value check with tolerance of 1e-5
        self.assertTrue(abs(ewma_res[1] - 1100.00) < 1e-5)
    def _get_expected_imbalance(self, window, imbalance_array):
        """
        Calculate the expected imbalance: 2P[b_t=1]-1, using a EWMA, pg 29
        :param window: EWMA window for calculation
        :param imbalance_array: (numpy array) of the tick imbalances
        :return: expected_imbalance: 2P[b_t=1]-1, approximated using a EWMA
        """
        if len(imbalance_array) < self.exp_num_ticks_init:
            # Waiting for array to fill for ewma
            ewma_window = np.nan
        else:
            # ewma window can be either the window specified in a function call
            # or it is len of imbalance_array if window > len(imbalance_array)
            ewma_window = int(min(len(imbalance_array), window))

        if np.isnan(ewma_window):
            # return nan, wait until len(imbalance_array) >= self.exp_num_ticks_init
            expected_imbalance = np.nan
        else:
            expected_imbalance = ewma(
                np.array(imbalance_array[-ewma_window:], dtype=float), window=ewma_window)[-1]

        return expected_imbalance
Esempio n. 8
0
    def _extract_bars(self, data: Tuple[list, np.ndarray]) -> list:
        """
        For loop which compiles the various run bars: dollar, volume, or tick.

        :param data: (list or np.ndarray) Contains 3 columns - date_time, price, and volume.
        :return: (list) of bars built using the current batch.
        """

        # Iterate over rows
        list_bars = []
        for row in data:
            # Set variables
            date_time = row[0]
            self.tick_num += 1
            price = np.float(row[1])
            volume = row[2]
            dollar_value = price * volume
            signed_tick = self._apply_tick_rule(price)

            if self.open_price is None:
                self.open_price = price

            # Update high low prices
            self.high_price, self.low_price = self._update_high_low(price)

            # Bar statistics calculations
            self.cum_statistics['cum_ticks'] += 1
            self.cum_statistics['cum_dollar_value'] += dollar_value
            self.cum_statistics['cum_volume'] += volume
            if signed_tick == 1:
                self.cum_statistics['cum_buy_volume'] += volume

            # Imbalance calculations
            imbalance = self._get_imbalance(price, signed_tick, volume)

            if imbalance > 0:
                self.imbalance_tick_statistics['imbalance_array_buy'].append(
                    imbalance)
                self.thresholds['cum_theta_buy'] += imbalance
                self.thresholds['buy_ticks_num'] += 1
            elif imbalance < 0:
                self.imbalance_tick_statistics['imbalance_array_sell'].append(
                    abs(imbalance))
                self.thresholds['cum_theta_sell'] += abs(imbalance)

            self.warm_up_flag = np.isnan([
                self.thresholds['exp_imbalance_buy'],
                self.thresholds['exp_imbalance_sell']
            ]).any(
            )  # Flag indicating that one of imbalances is not counted (warm-up)

            # Get expected imbalance for the first time, when num_ticks_init passed
            if not list_bars and self.warm_up_flag:
                self.thresholds[
                    'exp_imbalance_buy'] = self._get_expected_imbalance(
                        self.imbalance_tick_statistics['imbalance_array_buy'],
                        self.expected_imbalance_window,
                        warm_up=True)
                self.thresholds[
                    'exp_imbalance_sell'] = self._get_expected_imbalance(
                        self.imbalance_tick_statistics['imbalance_array_sell'],
                        self.expected_imbalance_window,
                        warm_up=True)

                if bool(
                        np.isnan([
                            self.thresholds['exp_imbalance_buy'],
                            self.thresholds['exp_imbalance_sell']
                        ]).any()) is False:
                    self.thresholds['exp_buy_ticks_proportion'] = self.thresholds['buy_ticks_num'] / \
                                                                  self.cum_statistics[
                                                                      'cum_ticks']

            if self.bars_thresholds is not None:
                self.thresholds['timestamp'] = date_time
                self.bars_thresholds.append(dict(self.thresholds))

            # Check expression for possible bar generation
            max_proportion = max(
                self.thresholds['exp_imbalance_buy'] *
                self.thresholds['exp_buy_ticks_proportion'],
                self.thresholds['exp_imbalance_sell'] *
                (1 - self.thresholds['exp_buy_ticks_proportion']))

            # Check expression for possible bar generation
            max_theta = max(self.thresholds['cum_theta_buy'],
                            self.thresholds['cum_theta_sell'])
            if max_theta > self.thresholds[
                    'exp_num_ticks'] * max_proportion and not np.isnan(
                        max_proportion):
                self._create_bars(date_time, price, self.high_price,
                                  self.low_price, list_bars)

                self.imbalance_tick_statistics['num_ticks_bar'].append(
                    self.cum_statistics['cum_ticks'])
                self.imbalance_tick_statistics['buy_ticks_proportion'].append(
                    self.thresholds['buy_ticks_num'] /
                    self.cum_statistics['cum_ticks'])

                # Expected number of ticks based on formed bars
                self.thresholds['exp_num_ticks'] = self._get_exp_num_ticks()

                # Expected buy ticks proportion based on formed bars
                exp_buy_ticks_proportion = ewma(
                    np.array(
                        self.imbalance_tick_statistics['buy_ticks_proportion']
                        [-self.num_prev_bars:],
                        dtype=float), self.num_prev_bars)[-1]
                self.thresholds[
                    'exp_buy_ticks_proportion'] = exp_buy_ticks_proportion

                # Get expected imbalance
                self.thresholds[
                    'exp_imbalance_buy'] = self._get_expected_imbalance(
                        self.imbalance_tick_statistics['imbalance_array_buy'],
                        self.expected_imbalance_window)
                self.thresholds[
                    'exp_imbalance_sell'] = self._get_expected_imbalance(
                        self.imbalance_tick_statistics['imbalance_array_sell'],
                        self.expected_imbalance_window)

                # Reset counters
                self._reset_cache()

        return list_bars
Esempio n. 9
0
    def _extract_bars(self, data):
        """
        For loop which compiles the various run bars: dollar, volume, or tick.

        :param data: (DataFrame) Contains 3 columns - date_time, price, and volume.
        :return: (List) of bars built using the current batch.
        """
        cum_ticks, buy_ticks, cum_volume, cum_theta_buy, cum_theta_sell, high_price, low_price = self._update_counters(
        )

        # Iterate over rows
        list_bars = []
        for row in data.values:
            # Set variables
            cum_ticks += 1
            date_time = row[0]
            price = np.float(row[1])
            volume = row[2]
            cum_volume += volume

            # Update high low prices
            high_price, low_price = self._update_high_low(
                high_price, low_price, price)

            # Imbalance calculations
            signed_tick = self._apply_tick_rule(price)
            imbalance = self._get_imbalance(price, signed_tick, volume)

            if imbalance > 0:
                self.imbalance_array['buy'].append(imbalance)
                cum_theta_buy += imbalance
                buy_ticks += 1
            elif imbalance < 0:
                self.imbalance_array['sell'].append(abs(imbalance))
                cum_theta_sell += abs(imbalance)

            imbalances_are_counted_flag = np.isnan([
                self.exp_imbalance['buy'], self.exp_imbalance['sell']
            ]).any(
            )  # flag indicating that both buy and sell imbalances are counted
            if not list_bars and imbalances_are_counted_flag:
                self.exp_imbalance['buy'] = self._get_expected_imbalance(
                    self.exp_num_ticks, self.imbalance_array['buy'])
                self.exp_imbalance['sell'] = self._get_expected_imbalance(
                    self.exp_num_ticks, self.imbalance_array['sell'])
                if bool(
                        np.isnan([
                            self.exp_imbalance['buy'],
                            self.exp_imbalance['sell']
                        ]).any()) is False:
                    self.exp_buy_ticks_proportion = buy_ticks / cum_ticks
                    cum_theta_buy, cum_theta_sell = 0, 0  # reset theta after warm-up period
                    self.warm_up = False

            # Update cache
            self._update_cache(date_time, price, low_price, high_price,
                               cum_theta_sell, cum_theta_buy, cum_ticks,
                               buy_ticks, cum_volume)

            # Check expression for possible bar generation
            max_proportion = max(
                self.exp_imbalance['buy'] * self.exp_buy_ticks_proportion,
                self.exp_imbalance['sell'] *
                (1 - self.exp_buy_ticks_proportion))
            if max(
                    cum_theta_buy, cum_theta_sell
            ) > self.exp_num_ticks * max_proportion and self.warm_up is False:
                self._create_bars(date_time, price, high_price, low_price,
                                  list_bars)

                self.num_ticks_bar['cum_ticks'].append(cum_ticks)
                self.num_ticks_bar['buy_proportion'].append(buy_ticks /
                                                            cum_ticks)
                # Expected number of ticks based on formed bars
                self.exp_num_ticks = ewma(
                    np.array(
                        self.num_ticks_bar['cum_ticks'][-self.num_prev_bars:],
                        dtype=float), self.num_prev_bars)[-1]
                # Expected buy ticks proportion based on formed bars
                self.exp_buy_ticks_proportion = \
                    ewma(np.array(self.num_ticks_bar['buy_proportion'][-self.num_prev_bars:], dtype=float),
                         self.num_prev_bars)[-1]
                self.exp_imbalance['buy'] = self._get_expected_imbalance(
                    self.exp_num_ticks * self.num_prev_bars,
                    self.imbalance_array['buy'])
                self.exp_imbalance['sell'] = self._get_expected_imbalance(
                    self.exp_num_ticks * self.num_prev_bars,
                    self.imbalance_array['sell'])

                # Reset counters
                cum_ticks, buy_ticks, cum_volume, cum_theta_buy, cum_theta_sell = 0, 0, 0, 0, 0
                high_price, low_price = -np.inf, np.inf
                self.cache = []

                # Update cache after bar generation (exp_num_ticks was changed after bar generation)
                self._update_cache(date_time, price, low_price, high_price,
                                   cum_theta_sell, cum_theta_buy, cum_ticks,
                                   buy_ticks, cum_volume)
        return list_bars