def _get_bar_features(self, date_time: pd.Timestamp, list_bars: list) -> list: """ Calculate inter-bar features: lambdas, entropies, avg_tick_size, vwap :param date_time: (pd.Timestamp) when bar was formed :param list_bars: (list) of previously formed bars :return: (list) of inter-bar features """ features = [date_time] # Tick rule sum, avg tick size, VWAP features.append(get_avg_tick_size(self.trade_size)) features.append(sum(self.tick_rule)) features.append(vwap(self.dollar_size, self.trade_size)) # Lambdas features.extend( get_trades_based_kyle_lambda(self.price_diff, self.trade_size, self.tick_rule)) # Kyle lambda features.extend( get_trades_based_amihud_lambda(self.log_ret, self.dollar_size)) # Amihud lambda features.extend( get_trades_based_hasbrouck_lambda( self.log_ret, self.dollar_size, self.tick_rule)) # Hasbrouck lambda # Entropy features encoded_tick_rule_message = encode_tick_rule_array(self.tick_rule) features.append(get_shannon_entropy(encoded_tick_rule_message)) features.append(get_plug_in_entropy(encoded_tick_rule_message)) features.append(get_lempel_ziv_entropy(encoded_tick_rule_message)) features.append(get_konto_entropy(encoded_tick_rule_message)) if self.volume_encoding is not None: message = encode_array(self.trade_size, self.volume_encoding) features.append(get_shannon_entropy(message)) features.append(get_plug_in_entropy(message)) features.append(get_lempel_ziv_entropy(message)) features.append(get_konto_entropy(message)) if self.pct_encoding is not None: message = encode_array(self.log_ret, self.pct_encoding) features.append(get_shannon_entropy(message)) features.append(get_plug_in_entropy(message)) features.append(get_lempel_ziv_entropy(message)) features.append(get_konto_entropy(message)) list_bars.append(features)
def test_entropy_calculations(self): """ Test entropy functions """ message = '11100001' message_array = [1, 1, 1, 0, 0, 0, 0, 1] shannon = get_shannon_entropy(message) plug_in = get_plug_in_entropy(message, word_length=1) plug_in_arr = get_plug_in_entropy(message_array, word_length=1) lempel = get_lempel_ziv_entropy(message) konto = get_konto_entropy(message) self.assertEqual(plug_in, plug_in_arr) self.assertAlmostEqual(shannon, 1.0, delta=1e-3) self.assertAlmostEqual(lempel, 0.625, delta=1e-3) self.assertAlmostEqual(plug_in, 0.985, delta=1e-3) self.assertAlmostEqual(konto, 0.9682, delta=1e-3) # Konto entropy boundary conditions konto_2 = get_konto_entropy(message, 2) _match_length('1101111', 2, 3) self.assertAlmostEqual(konto_2, 0.8453, delta=1e-4) self.assertEqual(get_konto_entropy('a'), 0) # one-character message entropy = 0