Beispiel #1
0
 def test_error_raise(self):
     """
     Test ValueError raise of threshold is neither float/int nor pd.Series
     :return:
     """
     with self.assertRaises(ValueError):
         cusum_filter(self.data['close'], threshold='str', time_stamps=True)
Beispiel #2
0
    def test_drop_labels(self):
        """
        Assert that drop_labels removes rare class labels.
        """
        daily_vol = get_daily_vol(close=self.data['close'], lookback=100)
        cusum_events = cusum_filter(self.data['close'], threshold=0.02)
        vertical_barriers = add_vertical_barrier(t_events=cusum_events,
                                                 close=self.data['close'],
                                                 num_days=1)
        triple_barrier_events = get_events(
            close=self.data['close'],
            t_events=cusum_events,
            pt_sl=[1, 1],
            target=daily_vol,
            min_ret=0.005,
            num_threads=3,
            vertical_barrier_times=vertical_barriers,
            side_prediction=None)
        triple_labels = get_bins(triple_barrier_events, self.data['close'])

        # Drop the 2 zero labels in the set since they are "rare"
        new_labels = drop_labels(events=triple_labels, min_pct=0.30)
        self.assertTrue(0 not in set(new_labels['bin']))

        # Assert that threshold works
        new_labels = drop_labels(events=triple_labels, min_pct=0.20)
        self.assertTrue(0 in set(new_labels['bin']))
Beispiel #3
0
    def test_cusum_filter(self):
        """
        Assert that the CUSUM filter works as expected.
        Checks that all the events generated by different threshold values meet the requirements of the filter.
        """

        # Check all events for various threshold levels
        for threshold in [0.005, 0.007, 0.01, 0.015, 0.02, 0.03, 0.04]:
            for timestamp in [True, False]:

                cusum_events = cusum_filter(self.data['close'],
                                            threshold=threshold,
                                            time_stamps=timestamp)

                for i in range(1, len(cusum_events)):
                    event_1 = self.data.index.get_loc(cusum_events[i - 1])
                    event_2 = self.data.index.get_loc(cusum_events[i])

                    date_range = self.data.iloc[event_1:event_2 + 1]['close']
                    last = np.log(date_range[-1])
                    minimum = np.log(date_range.min())
                    maximum = np.log(date_range.max())

                    # Calculate CUSUM
                    spos = last - minimum
                    sneg = last - maximum
                    cusum = max(np.abs(sneg), spos)

                    self.assertTrue(cusum >= threshold)
Beispiel #4
0
    def setUp(self):
        """
        Set the file path for the sample dollar bars data and get triple barrier events
        """
        project_path = os.path.dirname(__file__)
        self.path = project_path + '/test_data/dollar_bar_sample.csv'
        self.data = pd.read_csv(self.path, index_col='date_time')
        self.data.index = pd.to_datetime(self.data.index)

        daily_vol = get_daily_vol(close=self.data['close'], lookback=100)
        cusum_events = cusum_filter(self.data['close'], threshold=0.02)
        vertical_barriers = add_vertical_barrier(t_events=cusum_events,
                                                 close=self.data['close'],
                                                 num_days=2)

        self.data['side'] = 1
        self.meta_labeled_events = get_events(
            close=self.data['close'],
            t_events=cusum_events,
            pt_sl=[4, 4],
            target=daily_vol,
            min_ret=0.005,
            num_threads=3,
            vertical_barrier_times=vertical_barriers,
            side_prediction=self.data['side'],
            verbose=False)
Beispiel #5
0
 def test_dynamic_cusum_filter(self):
     """
     Test CUSUM filter with dynamic threshold, assert lenght of filtered series
     """
     dynamic_threshold = self.data['close'] * 1e-5
     cusum_events = cusum_filter(self.data['close'],
                                 threshold=dynamic_threshold,
                                 time_stamps=True)
     self.assertTrue(cusum_events.shape[0] == 9)
Beispiel #6
0
    def test_triple_barrier_labeling(self):
        """
        Assert that meta labeling as well as standard labeling works. Also check that if a vertical barrier is
        reached, then a 0 class label is assigned (in the case of standard labeling).
        """
        daily_vol = get_daily_vol(close=self.data['close'], lookback=100)
        cusum_events = cusum_filter(self.data['close'], threshold=0.02)
        vertical_barriers = add_vertical_barrier(t_events=cusum_events,
                                                 close=self.data['close'],
                                                 num_days=1)

        # ----------------------
        # Assert 0 labels are generated if vertical barrier hit
        triple_barrier_events = get_events(
            close=self.data['close'],
            t_events=cusum_events,
            pt_sl=[1, 1],
            target=daily_vol,
            min_ret=0.005,
            num_threads=3,
            vertical_barrier_times=vertical_barriers,
            side_prediction=None,
            verbose=False)

        triple_labels = get_bins(triple_barrier_events, self.data['close'])
        self.assertTrue(
            np.all(triple_labels[np.abs(triple_labels['ret']) <
                                 triple_labels['trgt']]['bin'] == 0))

        # Assert meta labeling works
        self.data['side'] = 1
        triple_barrier_events = get_events(
            close=self.data['close'],
            t_events=cusum_events,
            pt_sl=[1, 1],
            target=daily_vol,
            min_ret=0.005,
            num_threads=3,
            vertical_barrier_times=vertical_barriers,
            side_prediction=self.data['side'],
            verbose=False)

        triple_labels = get_bins(triple_barrier_events, self.data['close'])

        # Label 1 if made money, else 0
        condition1 = triple_labels['ret'] > 0
        condition2 = triple_labels['ret'].abs() > triple_labels['trgt']
        self.assertTrue(
            ((condition1 & condition2) == triple_labels['bin']).all())

        # Assert shape
        self.assertTrue(triple_labels.shape == (8, 4))
Beispiel #7
0
    def test_vertical_barriers(self):
        """
        Assert that the vertical barrier returns the timestamp x amount of days after the event.
        """
        cusum_events = cusum_filter(self.data['close'], threshold=0.02)

        # Compute vertical barrier
        for days in [1, 2, 3, 4, 5]:
            vertical_barriers = add_vertical_barrier(t_events=cusum_events,
                                                     close=self.data['close'],
                                                     num_days=days)

            # For each row assert the time delta is correct
            for start_date, end_date in vertical_barriers.iteritems():
                self.assertTrue((end_date - start_date).days >= 1)
Beispiel #8
0
    def test_triple_barrier_events(self):
        """
        Assert that the different version of triple barrier labeling match our expected output.
        Assert that trgts are the same for all 3 methods.
        """
        daily_vol = get_daily_vol(close=self.data['close'], lookback=100)
        cusum_events = cusum_filter(self.data['close'], threshold=0.02)
        vertical_barriers = add_vertical_barrier(t_events=cusum_events,
                                                 close=self.data['close'],
                                                 num_days=1)

        # No meta-labeling
        triple_barrier_events = get_events(
            close=self.data['close'],
            t_events=cusum_events,
            pt_sl=[1, 1],
            target=daily_vol,
            min_ret=0.005,
            num_threads=3,
            vertical_barrier_times=vertical_barriers,
            side_prediction=None)

        # Test that the events are the same as expected (naive test)
        self.assertTrue(triple_barrier_events.shape == (8, 4))  # Assert shape

        # Assert that targets match expectations
        self.assertTrue(triple_barrier_events.iloc[0,
                                                   1] == 0.010166261175903357)
        self.assertTrue(triple_barrier_events.iloc[-1,
                                                   1] == 0.006455887663302871)

        # Assert start of triple barrier event aligns with cusum_filter
        self.assertTrue(
            np.all(triple_barrier_events.index == cusum_events[1:]))

        # ----------------------
        # With meta-labeling
        self.data['side'] = 1
        meta_labeled_events = get_events(
            close=self.data['close'],
            t_events=cusum_events,
            pt_sl=[1, 1],
            target=daily_vol,
            min_ret=0.005,
            num_threads=3,
            vertical_barrier_times=vertical_barriers,
            side_prediction=self.data['side'])

        # Assert that the two different events are the the same as they are generated using same data
        self.assertTrue(
            np.all(meta_labeled_events['t1'] == triple_barrier_events['t1']))
        self.assertTrue(
            np.all(
                meta_labeled_events['trgt'] == triple_barrier_events['trgt']))

        # Assert shape
        self.assertTrue(meta_labeled_events.shape == (8, 5))

        # ----------------------
        # No vertical barriers
        no_vertical_events = get_events(close=self.data['close'],
                                        t_events=cusum_events,
                                        pt_sl=[1, 1],
                                        target=daily_vol,
                                        min_ret=0.005,
                                        num_threads=3,
                                        vertical_barrier_times=False,
                                        side_prediction=None)

        # Assert targets match other events trgts
        self.assertTrue(
            np.all(
                triple_barrier_events['trgt'] == no_vertical_events['trgt']))
        self.assertTrue(no_vertical_events.shape == (8, 4))

        # Previously the vertical barrier was touched twice, assert that those events aren't included here
        self.assertTrue(
            (no_vertical_events['t1'] != triple_barrier_events['t1']
             ).sum() == 2)
Beispiel #9
0
    def test_pt_sl_levels_triple_barrier_events(self):
        """
        Previously a bug was introduced by not multiplying the target by the profit taking / stop loss multiple. This
        meant that the get_bins function would not return the correct label. Example: if take profit was set to 1000,
        it would ignore this multiple and use only the target value. This meant that if we set a very large pt value
        (so high that it would never be hit before the vertical barrier is reached), it would ignore the multiple and
        only use the target value (it would signal that price reached the pt barrier). This meant that vertical barriers
        were incorrectly labeled.

        This also meant that irrespective of the pt_sl levels set, the labels would always be the same.
        """

        target = get_daily_vol(close=self.data['close'], lookback=100)
        cusum_events = cusum_filter(self.data['close'], threshold=0.02)
        vertical_barriers = add_vertical_barrier(t_events=cusum_events,
                                                 close=self.data['close'],
                                                 num_days=1)

        # --------------------------------------------------------------------------------------------------------
        # Assert that the vertical barrier would be reached for all positions due to the high pt level.
        # All labels should be 0. Check the 'bin' column
        pt_sl = [1000, 1000]
        triple_barrier_events_ptsl = get_events(
            close=self.data['close'],
            t_events=cusum_events,
            pt_sl=pt_sl,
            target=target,
            min_ret=0.005,
            num_threads=3,
            vertical_barrier_times=vertical_barriers,
            side_prediction=None)

        triple_labels_ptsl_large = get_bins(triple_barrier_events_ptsl,
                                            self.data['close'])
        labels_large = triple_labels_ptsl_large['bin']
        label_count = triple_labels_ptsl_large['bin'].sum()
        self.assertTrue(label_count == 0)

        # --------------------------------------------------------------------------------------------------------
        # Assert that the vertical barriers are never reached for very small multiples
        triple_barrier_events_ptsl = get_events(
            close=self.data['close'],
            t_events=cusum_events,
            pt_sl=[0.00000001, 0.00000001],
            target=target,
            min_ret=0.005,
            num_threads=3,
            vertical_barrier_times=vertical_barriers,
            side_prediction=None)

        triple_labels_ptsl_small = get_bins(triple_barrier_events_ptsl,
                                            self.data['close'])
        labels_small = triple_labels_ptsl_small['bin']
        label_count = (triple_labels_ptsl_small['bin'] == 0).sum()
        self.assertTrue(label_count == 0)

        # --------------------------------------------------------------------------------------------------------
        # TP too large and tight stop loss: expected all values less than 1
        triple_barrier_events_ptsl = get_events(
            close=self.data['close'],
            t_events=cusum_events,
            pt_sl=[10000, 0.00000001],
            target=target,
            min_ret=0.005,
            num_threads=3,
            vertical_barrier_times=vertical_barriers,
            side_prediction=None)

        labels_no_ones = get_bins(triple_barrier_events_ptsl,
                                  self.data['close'])['bin']
        self.assertTrue(np.all(labels_no_ones < 1))

        # --------------------------------------------------------------------------------------------------------
        # Test that the bins are in-fact different. (Previously they would be the same)
        self.assertTrue(np.all(labels_small[0:5] != labels_large[0:5]))
Beispiel #10
0
    def setUp(self):
        """
        Set the file path for the sample dollar bars data and get triple barrier events, generate features
        """
        project_path = os.path.dirname(__file__)
        self.path = project_path + '/test_data/dollar_bar_sample.csv'
        self.data = pd.read_csv(self.path, index_col='date_time')
        self.data.index = pd.to_datetime(self.data.index)

        # Compute moving averages
        self.data['fast_mavg'] = self.data['close'].rolling(
            window=20, min_periods=20, center=False).mean()
        self.data['slow_mavg'] = self.data['close'].rolling(
            window=50, min_periods=50, center=False).mean()

        # Compute sides
        self.data['side'] = np.nan

        long_signals = self.data['fast_mavg'] >= self.data['slow_mavg']
        short_signals = self.data['fast_mavg'] < self.data['slow_mavg']
        self.data.loc[long_signals, 'side'] = 1
        self.data.loc[short_signals, 'side'] = -1

        # Remove Look ahead bias by lagging the signal
        self.data['side'] = self.data['side'].shift(1)

        daily_vol = get_daily_vol(close=self.data['close'], lookback=50) * 0.5
        cusum_events = cusum_filter(self.data['close'], threshold=0.005)
        vertical_barriers = add_vertical_barrier(t_events=cusum_events,
                                                 close=self.data['close'],
                                                 num_hours=2)
        meta_labeled_events = get_events(
            close=self.data['close'],
            t_events=cusum_events,
            pt_sl=[1, 4],
            target=daily_vol,
            min_ret=5e-5,
            num_threads=3,
            vertical_barrier_times=vertical_barriers,
            side_prediction=self.data['side'])
        meta_labeled_events.dropna(inplace=True)
        labels = get_bins(meta_labeled_events, self.data['close'])

        # Generate data set which shows the power of SB Bagging vs Standard Bagging
        ind_mat = get_ind_matrix(meta_labeled_events.t1, self.data.close)

        unique_samples = _get_synthetic_samples(ind_mat, 0.5, 0.1)

        X = self.data.loc[labels.index, ].iloc[unique_samples].dropna(
        )  # get synthetic data set with drawn samples
        labels = labels.loc[X.index, :]
        X.loc[labels.index, 'y'] = labels.bin

        # Generate features (some of them are informative, others are just noise)
        for index, value in X.y.iteritems():
            X.loc[index,
                  'label_prob_0.6'] = _generate_label_with_prob(value, 0.6)
            X.loc[index,
                  'label_prob_0.5'] = _generate_label_with_prob(value, 0.5)
            X.loc[index,
                  'label_prob_0.3'] = _generate_label_with_prob(value, 0.3)
            X.loc[index,
                  'label_prob_0.2'] = _generate_label_with_prob(value, 0.2)
            X.loc[index,
                  'label_prob_0.1'] = _generate_label_with_prob(value, 0.1)

        features = ['label_prob_0.6', 'label_prob_0.2',
                    'label_prob_0.1']  # Two super-informative features
        for prob in [0.5, 0.3, 0.2, 0.1]:
            for window in [2, 5]:
                X['label_prob_{}_sma_{}'.format(
                    prob, window)] = X['label_prob_{}'.format(prob)].rolling(
                        window=window).mean()
                features.append('label_prob_{}_sma_{}'.format(prob, window))
        X.dropna(inplace=True)
        y = X.pop('y')

        self.X_train, self.X_test, self.y_train_clf, self.y_test_clf = train_test_split(
            X[features], y, test_size=0.4, random_state=1, shuffle=False)
        self.y_train_reg = (1 + self.y_train_clf)
        self.y_test_reg = (1 + self.y_test_clf)

        self.samples_info_sets = meta_labeled_events.loc[self.X_train.index,
                                                         't1']
        self.price_bars_trim = self.data[
            (self.data.index >= self.X_train.index.min())
            & (self.data.index <= self.X_train.index.max())].close