Ejemplo n.º 1
0
def get_weights_by_time_decay(triple_barrier_events, close_series, num_threads=5, decay=1):
    """
    Snippet 4.11, page 70, Implementation of Time Decay Factors

    :param triple_barrier_events: (data frame) of events from labeling.get_events()
    :param close_series: (pd.Series) close prices
    :param num_threads: (int) the number of threads concurrently used by the function.
    :param decay: (int) decay factor
        - decay = 1 means there is no time decay
        - 0 < decay < 1 means that weights decay linearly over time, but every observation still receives a strictly positive weight, regadless of how old
        - decay = 0 means that weights converge linearly to zero, as they become older
        - decay < 0 means that the oldes portion c of the observations receive zero weight (i.e they are erased from memory)
    :return: (pd.Series) of sample weights based on time decay factors
    """
    assert bool(triple_barrier_events.isnull().values.any()) is False and bool(
        triple_barrier_events.index.isnull().any()) is False, 'NaN values in triple_barrier_events, delete nans'

    # Apply piecewise-linear decay to observed uniqueness
    # Newest observation gets weight=1, oldest observation gets weight=decay
    av_uniqueness = get_av_uniqueness_from_triple_barrier(triple_barrier_events, close_series, num_threads)
    decay_w = av_uniqueness['tW'].sort_index().cumsum()
    if decay >= 0:
        slope = (1 - decay) / decay_w.iloc[-1]
    else:
        slope = 1 / ((decay + 1) * decay_w.iloc[-1])
    const = 1 - slope * decay_w.iloc[-1]
    decay_w = const + slope * decay_w
    decay_w[decay_w < 0] = 0  # Weights can't be negative
    return decay_w
Ejemplo n.º 2
0
    def test_get_av_uniqueness(self):
        """
        Assert that average event uniqueness is available for all labels and equals to particular values
        """

        av_un = get_av_uniqueness_from_triple_barrier(self.samples_info_sets, self.price_bars, num_threads=4)
        # Assert for each label we have uniqueness value
        self.assertTrue(av_un.shape[0] == self.samples_info_sets.shape[0])
        self.assertAlmostEqual(av_un['tW'].iloc[0], 0.66, delta=1e-2)
        self.assertAlmostEqual(av_un['tW'].iloc[2], 0.83, delta=1e-2)
        self.assertAlmostEqual(av_un['tW'].iloc[5], 0.44, delta=1e-2)
        self.assertAlmostEqual(av_un['tW'].iloc[-1], 1.0, delta=1e-2)
Ejemplo n.º 3
0
    def test_get_av_uniqueness(self):
        """
        Assert that average event uniqueness is available for all labels and equals to particular values
        """

        av_un = get_av_uniqueness_from_triple_barrier(self.meta_labeled_events, self.data['close'], num_threads=4)
        # Assert for each label we have uniqueness value
        self.assertTrue(av_un.shape[0] == self.meta_labeled_events.shape[0])
        self.assertTrue(av_un['tW'].iloc[0] == 1)
        self.assertTrue(av_un['tW'].iloc[4] == 0.5)
        self.assertTrue(av_un['tW'].iloc[6] == 0.85)
        self.assertTrue(bool(pd.isnull(av_un['tW'].iloc[-1])) is True)
Ejemplo n.º 4
0
def get_concurrent_stats(lbars_df: pd.DataFrame) -> dict:
    # from mlfinlab.sampling.bootstrapping import get_ind_matrix, get_ind_mat_average_uniqueness
    from mlfinlab.sampling.concurrent import get_av_uniqueness_from_triple_barrier

    samples_info_sets = lbars_df[['label_start_at', 'label_outcome_at']]
    samples_info_sets = samples_info_sets.set_index('label_start_at')
    samples_info_sets.columns = ['t1']  # t1 = label_outcome_at

    price_bars = lbars_df[['open_at', 'close_at', 'price_close']]
    price_bars = price_bars.set_index('close_at')

    label_avg_unq = get_av_uniqueness_from_triple_barrier(samples_info_sets,
                                                          price_bars,
                                                          num_threads=1)
    # ind_mat = get_ind_matrix(samples_info_sets, price_bars)
    # avg_unq_ind_mat = get_ind_mat_average_uniqueness(ind_mat)
    results = {
        # 'label_avg_unq': label_avg_unq,
        'grand_avg_unq': label_avg_unq['tW'].mean(),
        # 'ind_mat': ind_mat,
        # 'ind_mat_avg_unq': avg_unq_ind_mat
    }
    return results
Ejemplo n.º 5
0
from mlfinlab.sampling.concurrent import get_av_uniqueness_from_triple_barrier
import pandas as pd
import numpy as np

# Get barrier events (you might have to drop duplicate timestamps...)
barrier_events = pd.read_csv('barrier_events.csv', parse_dates=[0])
barrier_events.drop_duplicates(subset="t1", keep=False, inplace=True)
barrier_events.set_index('t1', drop=False, inplace=True)

# Get our close prices from csv
close_prices = pd.read_csv('stupid_data.csv', index_col=0, parse_dates=[0, 2])
print(close_prices)

# We can measure average label uniqueness using get_av_uniqueness_from_tripple_barrier function from mlfinlab package
av_unique = get_av_uniqueness_from_triple_barrier(barrier_events,
                                                  close_prices.close,
                                                  num_threads=3)
av_unique.mean()
print(av_unique.mean())

# Index of the first unique label
unique_label_index = av_unique[av_unique.tW == 1].index[
    0]  # take the first sample
print(unique_label_index)

barrier_events[barrier_events.index >= unique_label_index].head(
)  ### Figure out why this does not work

### Bagging, Bootstrapping and Random Forrest

# Ensemble learning technique (bagging with replacement) the goal is to randomly choose data samples