def _get_bar_features(self, date_time: pd.Timestamp,
                          list_bars: list) -> list:
        """
        Calculate inter-bar features: lambdas, entropies, avg_tick_size, vwap

        :param date_time: (pd.Timestamp) when bar was formed
        :param list_bars: (list) of previously formed bars
        :return: (list) of inter-bar features
        """
        features = [date_time]

        # Tick rule sum, avg tick size, VWAP
        features.append(get_avg_tick_size(self.trade_size))
        features.append(sum(self.tick_rule))
        features.append(vwap(self.dollar_size, self.trade_size))

        # Lambdas
        features.extend(
            get_trades_based_kyle_lambda(self.price_diff, self.trade_size,
                                         self.tick_rule))  # Kyle lambda
        features.extend(
            get_trades_based_amihud_lambda(self.log_ret,
                                           self.dollar_size))  # Amihud lambda
        features.extend(
            get_trades_based_hasbrouck_lambda(
                self.log_ret, self.dollar_size,
                self.tick_rule))  # Hasbrouck lambda

        # Entropy features
        encoded_tick_rule_message = encode_tick_rule_array(self.tick_rule)
        features.append(get_shannon_entropy(encoded_tick_rule_message))
        features.append(get_plug_in_entropy(encoded_tick_rule_message))
        features.append(get_lempel_ziv_entropy(encoded_tick_rule_message))
        features.append(get_konto_entropy(encoded_tick_rule_message))

        if self.volume_encoding is not None:
            message = encode_array(self.trade_size, self.volume_encoding)
            features.append(get_shannon_entropy(message))
            features.append(get_plug_in_entropy(message))
            features.append(get_lempel_ziv_entropy(message))
            features.append(get_konto_entropy(message))

        if self.pct_encoding is not None:
            message = encode_array(self.log_ret, self.pct_encoding)
            features.append(get_shannon_entropy(message))
            features.append(get_plug_in_entropy(message))
            features.append(get_lempel_ziv_entropy(message))
            features.append(get_konto_entropy(message))

        list_bars.append(features)
Exemple #2
0
    def test_entropy_calculations(self):
        """
        Test entropy functions
        """
        message = '11100001'
        message_array = [1, 1, 1, 0, 0, 0, 0, 1]
        shannon = get_shannon_entropy(message)
        plug_in = get_plug_in_entropy(message, word_length=1)
        plug_in_arr = get_plug_in_entropy(message_array, word_length=1)
        lempel = get_lempel_ziv_entropy(message)
        konto = get_konto_entropy(message)

        self.assertEqual(plug_in, plug_in_arr)
        self.assertAlmostEqual(shannon, 1.0, delta=1e-3)
        self.assertAlmostEqual(lempel, 0.625, delta=1e-3)
        self.assertAlmostEqual(plug_in, 0.985, delta=1e-3)
        self.assertAlmostEqual(konto, 0.9682, delta=1e-3)

        # Konto entropy boundary conditions
        konto_2 = get_konto_entropy(message, 2)
        _match_length('1101111', 2, 3)
        self.assertAlmostEqual(konto_2, 0.8453, delta=1e-4)
        self.assertEqual(get_konto_entropy('a'), 0)  # one-character message entropy = 0