def _get_expected_imbalance(self, window, imbalance_array): """ Calculate the expected imbalance as defined on page 31 and 32. :param window: EWMA window for calculation :param imbalance_array: numpy array of imbalances [buy, sell] :return: expected_buy_proportion and expected_sell_proportion """ if len(imbalance_array['buy']) < self.exp_num_ticks_init: # Waiting for array to fill for ewma ewma_window = np.nan else: # ewma window can be either the window specified in a function call # or it is len of imbalance_array if window > len(imbalance_array) ewma_window = int(min(len(imbalance_array), window)) if np.isnan(ewma_window): exp_buy_proportion, exp_sell_proportion = np.nan, np.nan else: buy_sample = np.array(imbalance_array['buy'][-ewma_window:], dtype=float) sell_sample = np.array(imbalance_array['sell'][-ewma_window:], dtype=float) exp_buy_proportion = ewma(buy_sample, window=ewma_window)[-1] exp_sell_proportion = ewma(sell_sample, window=ewma_window)[-1] return exp_buy_proportion, exp_sell_proportion
def _get_expected_imbalance(self, array: list, window: int, warm_up: bool = False): """ Advances in Financial Machine Learning, page 29. Calculates the expected imbalance: 2P[b_t=1]-1, using a EWMA. :param array: (list) of imbalances :param window: (int) EWMA window for calculation :parawm warm_up: (bool) flag of whether warm up period passed :return: expected_imbalance: (np.ndarray) 2P[b_t=1]-1, approximated using a EWMA """ if len(array) < self.thresholds['exp_num_ticks'] and warm_up is True: # Waiting for array to fill for ewma ewma_window = np.nan else: # ewma window can be either the window specified in a function call # or it is len of imbalance_array if window > len(imbalance_array) ewma_window = int(min(len(array), window)) if np.isnan(ewma_window): # return nan, wait until len(self.imbalance_array) >= self.exp_num_ticks_init expected_imbalance = np.nan else: expected_imbalance = ewma(np.array(array[-ewma_window:], dtype=float), window=ewma_window)[-1] return expected_imbalance
def _get_expected_imbalance(self, window: int): """ Calculate the expected imbalance: 2P[b_t=1]-1, using a EWMA, pg 29 :param window: (int) EWMA window for calculation :return: expected_imbalance: (np.ndarray) 2P[b_t=1]-1, approximated using a EWMA """ if len(self.imbalance_tick_statistics['imbalance_array'] ) < self.thresholds['exp_num_ticks']: # Waiting for array to fill for ewma ewma_window = np.nan else: # ewma window can be either the window specified in a function call # or it is len of imbalance_array if window > len(imbalance_array) ewma_window = int( min(len(self.imbalance_tick_statistics['imbalance_array']), window)) if np.isnan(ewma_window): # return nan, wait until len(self.imbalance_array) >= self.exp_num_ticks_init expected_imbalance = np.nan else: expected_imbalance = ewma( np.array(self.imbalance_tick_statistics['imbalance_array'] [-ewma_window:], dtype=float), window=ewma_window)[-1] return expected_imbalance
def _get_exp_num_ticks(self): prev_num_of_ticks = self.imbalance_tick_statistics['num_ticks_bar'] exp_num_ticks = ewma( np.array(prev_num_of_ticks[-self.num_prev_bars:], dtype=float), self.num_prev_bars)[-1] return min(max(exp_num_ticks, self.min_exp_num_ticks), self.max_exp_num_ticks)
def _extract_bars(self, data): """ For loop which compiles the various imbalance bars: dollar, volume, or tick. :param data: (DataFrame) Contains 3 columns - date_time, price, and volume. :return: (List) of bars built using the current batch. """ cum_ticks, cum_volume, cum_theta, high_price, low_price = self._update_counters() # Iterate over rows list_bars = [] for row in data.values: # Set variables cum_ticks += 1 date_time = row[0] price = np.float(row[1]) volume = row[2] cum_volume += volume # Update high low prices high_price, low_price = self._update_high_low( high_price, low_price, price) # Imbalance calculations signed_tick = self._apply_tick_rule(price) imbalance = self._get_imbalance(price, signed_tick, volume) self.imbalance_array.append(imbalance) cum_theta += imbalance if not list_bars and np.isnan(self.expected_imbalance): self.expected_imbalance = self._get_expected_imbalance( self.exp_num_ticks, self.imbalance_array) # Update cache self._update_cache(date_time, price, low_price, high_price, cum_ticks, cum_volume, cum_theta) # Check expression for possible bar generation if np.abs(cum_theta) > self.exp_num_ticks * np.abs(self.expected_imbalance): self._create_bars(date_time, price, high_price, low_price, list_bars) self.num_ticks_bar.append(cum_ticks) # Expected number of ticks based on formed bars self.exp_num_ticks = ewma(np.array( self.num_ticks_bar[-self.num_prev_bars:], dtype=float), self.num_prev_bars)[-1] self.expected_imbalance = self._get_expected_imbalance( self.exp_num_ticks * self.num_prev_bars, self.imbalance_array) # Reset counters cum_ticks, cum_volume, cum_theta = 0, 0, 0 high_price, low_price = -np.inf, np.inf self.cache = [] # Update cache after bar generation (exp_num_ticks was changed after bar generation) self._update_cache(date_time, price, low_price, high_price, cum_ticks, cum_volume, cum_theta) return list_bars
def test_ewma(self): """ Tests the imbalance dollar bars implementation. """ test_sample = pd.read_csv(self.path) price_arr = np.array(test_sample.Price.values, dtype=float) ewma_res = ewma(price_arr, window=20) # Assert output array length equals input array length self.assertTrue(ewma_res.shape == price_arr.shape) # Assert the first value of ewma equals to input array value self.assertTrue(ewma_res[0] == price_arr[0]) # Assert next value check with tolerance of 1e-5 self.assertTrue(abs(ewma_res[1] - 1100.00) < 1e-5)
def _get_expected_imbalance(self, window, imbalance_array): """ Calculate the expected imbalance: 2P[b_t=1]-1, using a EWMA, pg 29 :param window: EWMA window for calculation :param imbalance_array: (numpy array) of the tick imbalances :return: expected_imbalance: 2P[b_t=1]-1, approximated using a EWMA """ if len(imbalance_array) < self.exp_num_ticks_init: # Waiting for array to fill for ewma ewma_window = np.nan else: # ewma window can be either the window specified in a function call # or it is len of imbalance_array if window > len(imbalance_array) ewma_window = int(min(len(imbalance_array), window)) if np.isnan(ewma_window): # return nan, wait until len(imbalance_array) >= self.exp_num_ticks_init expected_imbalance = np.nan else: expected_imbalance = ewma( np.array(imbalance_array[-ewma_window:], dtype=float), window=ewma_window)[-1] return expected_imbalance
def _extract_bars(self, data: Tuple[list, np.ndarray]) -> list: """ For loop which compiles the various run bars: dollar, volume, or tick. :param data: (list or np.ndarray) Contains 3 columns - date_time, price, and volume. :return: (list) of bars built using the current batch. """ # Iterate over rows list_bars = [] for row in data: # Set variables date_time = row[0] self.tick_num += 1 price = np.float(row[1]) volume = row[2] dollar_value = price * volume signed_tick = self._apply_tick_rule(price) if self.open_price is None: self.open_price = price # Update high low prices self.high_price, self.low_price = self._update_high_low(price) # Bar statistics calculations self.cum_statistics['cum_ticks'] += 1 self.cum_statistics['cum_dollar_value'] += dollar_value self.cum_statistics['cum_volume'] += volume if signed_tick == 1: self.cum_statistics['cum_buy_volume'] += volume # Imbalance calculations imbalance = self._get_imbalance(price, signed_tick, volume) if imbalance > 0: self.imbalance_tick_statistics['imbalance_array_buy'].append( imbalance) self.thresholds['cum_theta_buy'] += imbalance self.thresholds['buy_ticks_num'] += 1 elif imbalance < 0: self.imbalance_tick_statistics['imbalance_array_sell'].append( abs(imbalance)) self.thresholds['cum_theta_sell'] += abs(imbalance) self.warm_up_flag = np.isnan([ self.thresholds['exp_imbalance_buy'], self.thresholds['exp_imbalance_sell'] ]).any( ) # Flag indicating that one of imbalances is not counted (warm-up) # Get expected imbalance for the first time, when num_ticks_init passed if not list_bars and self.warm_up_flag: self.thresholds[ 'exp_imbalance_buy'] = self._get_expected_imbalance( self.imbalance_tick_statistics['imbalance_array_buy'], self.expected_imbalance_window, warm_up=True) self.thresholds[ 'exp_imbalance_sell'] = self._get_expected_imbalance( self.imbalance_tick_statistics['imbalance_array_sell'], self.expected_imbalance_window, warm_up=True) if bool( np.isnan([ self.thresholds['exp_imbalance_buy'], self.thresholds['exp_imbalance_sell'] ]).any()) is False: self.thresholds['exp_buy_ticks_proportion'] = self.thresholds['buy_ticks_num'] / \ self.cum_statistics[ 'cum_ticks'] if self.bars_thresholds is not None: self.thresholds['timestamp'] = date_time self.bars_thresholds.append(dict(self.thresholds)) # Check expression for possible bar generation max_proportion = max( self.thresholds['exp_imbalance_buy'] * self.thresholds['exp_buy_ticks_proportion'], self.thresholds['exp_imbalance_sell'] * (1 - self.thresholds['exp_buy_ticks_proportion'])) # Check expression for possible bar generation max_theta = max(self.thresholds['cum_theta_buy'], self.thresholds['cum_theta_sell']) if max_theta > self.thresholds[ 'exp_num_ticks'] * max_proportion and not np.isnan( max_proportion): self._create_bars(date_time, price, self.high_price, self.low_price, list_bars) self.imbalance_tick_statistics['num_ticks_bar'].append( self.cum_statistics['cum_ticks']) self.imbalance_tick_statistics['buy_ticks_proportion'].append( self.thresholds['buy_ticks_num'] / self.cum_statistics['cum_ticks']) # Expected number of ticks based on formed bars self.thresholds['exp_num_ticks'] = self._get_exp_num_ticks() # Expected buy ticks proportion based on formed bars exp_buy_ticks_proportion = ewma( np.array( self.imbalance_tick_statistics['buy_ticks_proportion'] [-self.num_prev_bars:], dtype=float), self.num_prev_bars)[-1] self.thresholds[ 'exp_buy_ticks_proportion'] = exp_buy_ticks_proportion # Get expected imbalance self.thresholds[ 'exp_imbalance_buy'] = self._get_expected_imbalance( self.imbalance_tick_statistics['imbalance_array_buy'], self.expected_imbalance_window) self.thresholds[ 'exp_imbalance_sell'] = self._get_expected_imbalance( self.imbalance_tick_statistics['imbalance_array_sell'], self.expected_imbalance_window) # Reset counters self._reset_cache() return list_bars
def _extract_bars(self, data): """ For loop which compiles the various run bars: dollar, volume, or tick. :param data: (DataFrame) Contains 3 columns - date_time, price, and volume. :return: (List) of bars built using the current batch. """ cum_ticks, buy_ticks, cum_volume, cum_theta_buy, cum_theta_sell, high_price, low_price = self._update_counters( ) # Iterate over rows list_bars = [] for row in data.values: # Set variables cum_ticks += 1 date_time = row[0] price = np.float(row[1]) volume = row[2] cum_volume += volume # Update high low prices high_price, low_price = self._update_high_low( high_price, low_price, price) # Imbalance calculations signed_tick = self._apply_tick_rule(price) imbalance = self._get_imbalance(price, signed_tick, volume) if imbalance > 0: self.imbalance_array['buy'].append(imbalance) cum_theta_buy += imbalance buy_ticks += 1 elif imbalance < 0: self.imbalance_array['sell'].append(abs(imbalance)) cum_theta_sell += abs(imbalance) imbalances_are_counted_flag = np.isnan([ self.exp_imbalance['buy'], self.exp_imbalance['sell'] ]).any( ) # flag indicating that both buy and sell imbalances are counted if not list_bars and imbalances_are_counted_flag: self.exp_imbalance['buy'] = self._get_expected_imbalance( self.exp_num_ticks, self.imbalance_array['buy']) self.exp_imbalance['sell'] = self._get_expected_imbalance( self.exp_num_ticks, self.imbalance_array['sell']) if bool( np.isnan([ self.exp_imbalance['buy'], self.exp_imbalance['sell'] ]).any()) is False: self.exp_buy_ticks_proportion = buy_ticks / cum_ticks cum_theta_buy, cum_theta_sell = 0, 0 # reset theta after warm-up period self.warm_up = False # Update cache self._update_cache(date_time, price, low_price, high_price, cum_theta_sell, cum_theta_buy, cum_ticks, buy_ticks, cum_volume) # Check expression for possible bar generation max_proportion = max( self.exp_imbalance['buy'] * self.exp_buy_ticks_proportion, self.exp_imbalance['sell'] * (1 - self.exp_buy_ticks_proportion)) if max( cum_theta_buy, cum_theta_sell ) > self.exp_num_ticks * max_proportion and self.warm_up is False: self._create_bars(date_time, price, high_price, low_price, list_bars) self.num_ticks_bar['cum_ticks'].append(cum_ticks) self.num_ticks_bar['buy_proportion'].append(buy_ticks / cum_ticks) # Expected number of ticks based on formed bars self.exp_num_ticks = ewma( np.array( self.num_ticks_bar['cum_ticks'][-self.num_prev_bars:], dtype=float), self.num_prev_bars)[-1] # Expected buy ticks proportion based on formed bars self.exp_buy_ticks_proportion = \ ewma(np.array(self.num_ticks_bar['buy_proportion'][-self.num_prev_bars:], dtype=float), self.num_prev_bars)[-1] self.exp_imbalance['buy'] = self._get_expected_imbalance( self.exp_num_ticks * self.num_prev_bars, self.imbalance_array['buy']) self.exp_imbalance['sell'] = self._get_expected_imbalance( self.exp_num_ticks * self.num_prev_bars, self.imbalance_array['sell']) # Reset counters cum_ticks, buy_ticks, cum_volume, cum_theta_buy, cum_theta_sell = 0, 0, 0, 0, 0 high_price, low_price = -np.inf, np.inf self.cache = [] # Update cache after bar generation (exp_num_ticks was changed after bar generation) self._update_cache(date_time, price, low_price, high_price, cum_theta_sell, cum_theta_buy, cum_ticks, buy_ticks, cum_volume) return list_bars