def get_weights_by_time_decay(triple_barrier_events, close_series, num_threads=5, decay=1): """ Snippet 4.11, page 70, Implementation of Time Decay Factors :param triple_barrier_events: (data frame) of events from labeling.get_events() :param close_series: (pd.Series) close prices :param num_threads: (int) the number of threads concurrently used by the function. :param decay: (int) decay factor - decay = 1 means there is no time decay - 0 < decay < 1 means that weights decay linearly over time, but every observation still receives a strictly positive weight, regadless of how old - decay = 0 means that weights converge linearly to zero, as they become older - decay < 0 means that the oldes portion c of the observations receive zero weight (i.e they are erased from memory) :return: (pd.Series) of sample weights based on time decay factors """ assert bool(triple_barrier_events.isnull().values.any()) is False and bool( triple_barrier_events.index.isnull().any()) is False, 'NaN values in triple_barrier_events, delete nans' # Apply piecewise-linear decay to observed uniqueness # Newest observation gets weight=1, oldest observation gets weight=decay av_uniqueness = get_av_uniqueness_from_triple_barrier(triple_barrier_events, close_series, num_threads) decay_w = av_uniqueness['tW'].sort_index().cumsum() if decay >= 0: slope = (1 - decay) / decay_w.iloc[-1] else: slope = 1 / ((decay + 1) * decay_w.iloc[-1]) const = 1 - slope * decay_w.iloc[-1] decay_w = const + slope * decay_w decay_w[decay_w < 0] = 0 # Weights can't be negative return decay_w
def test_get_av_uniqueness(self): """ Assert that average event uniqueness is available for all labels and equals to particular values """ av_un = get_av_uniqueness_from_triple_barrier(self.samples_info_sets, self.price_bars, num_threads=4) # Assert for each label we have uniqueness value self.assertTrue(av_un.shape[0] == self.samples_info_sets.shape[0]) self.assertAlmostEqual(av_un['tW'].iloc[0], 0.66, delta=1e-2) self.assertAlmostEqual(av_un['tW'].iloc[2], 0.83, delta=1e-2) self.assertAlmostEqual(av_un['tW'].iloc[5], 0.44, delta=1e-2) self.assertAlmostEqual(av_un['tW'].iloc[-1], 1.0, delta=1e-2)
def test_get_av_uniqueness(self): """ Assert that average event uniqueness is available for all labels and equals to particular values """ av_un = get_av_uniqueness_from_triple_barrier(self.meta_labeled_events, self.data['close'], num_threads=4) # Assert for each label we have uniqueness value self.assertTrue(av_un.shape[0] == self.meta_labeled_events.shape[0]) self.assertTrue(av_un['tW'].iloc[0] == 1) self.assertTrue(av_un['tW'].iloc[4] == 0.5) self.assertTrue(av_un['tW'].iloc[6] == 0.85) self.assertTrue(bool(pd.isnull(av_un['tW'].iloc[-1])) is True)
def get_concurrent_stats(lbars_df: pd.DataFrame) -> dict: # from mlfinlab.sampling.bootstrapping import get_ind_matrix, get_ind_mat_average_uniqueness from mlfinlab.sampling.concurrent import get_av_uniqueness_from_triple_barrier samples_info_sets = lbars_df[['label_start_at', 'label_outcome_at']] samples_info_sets = samples_info_sets.set_index('label_start_at') samples_info_sets.columns = ['t1'] # t1 = label_outcome_at price_bars = lbars_df[['open_at', 'close_at', 'price_close']] price_bars = price_bars.set_index('close_at') label_avg_unq = get_av_uniqueness_from_triple_barrier(samples_info_sets, price_bars, num_threads=1) # ind_mat = get_ind_matrix(samples_info_sets, price_bars) # avg_unq_ind_mat = get_ind_mat_average_uniqueness(ind_mat) results = { # 'label_avg_unq': label_avg_unq, 'grand_avg_unq': label_avg_unq['tW'].mean(), # 'ind_mat': ind_mat, # 'ind_mat_avg_unq': avg_unq_ind_mat } return results
from mlfinlab.sampling.concurrent import get_av_uniqueness_from_triple_barrier import pandas as pd import numpy as np # Get barrier events (you might have to drop duplicate timestamps...) barrier_events = pd.read_csv('barrier_events.csv', parse_dates=[0]) barrier_events.drop_duplicates(subset="t1", keep=False, inplace=True) barrier_events.set_index('t1', drop=False, inplace=True) # Get our close prices from csv close_prices = pd.read_csv('stupid_data.csv', index_col=0, parse_dates=[0, 2]) print(close_prices) # We can measure average label uniqueness using get_av_uniqueness_from_tripple_barrier function from mlfinlab package av_unique = get_av_uniqueness_from_triple_barrier(barrier_events, close_prices.close, num_threads=3) av_unique.mean() print(av_unique.mean()) # Index of the first unique label unique_label_index = av_unique[av_unique.tW == 1].index[ 0] # take the first sample print(unique_label_index) barrier_events[barrier_events.index >= unique_label_index].head( ) ### Figure out why this does not work ### Bagging, Bootstrapping and Random Forrest # Ensemble learning technique (bagging with replacement) the goal is to randomly choose data samples