def test_trend_scanning_labels(self): """ Test trend scanning labels """ t_events = self.eem_close.index tr_scan_labels = trend_scanning_labels(self.eem_close, t_events, 20) self.assertEqual(tr_scan_labels.shape[0], len(t_events)) # we have label value for all t events # Before 2008/5/12 we had a strong positive trend self.assertTrue( set( tr_scan_labels.reindex( pd.date_range(pd.Timestamp(2008, 1, 1), pd.Timestamp(2008, 5, 9))).dropna().bin) == set([1])) self.assertEqual(tr_scan_labels.bin.value_counts()[-1], 70) # Number of -1 labels check self.assertEqual(tr_scan_labels.bin.value_counts()[1], 40) # Approx. 20 latest labels are nan because we don't have data for look-forward period (20) self.assertEqual( tr_scan_labels.shape[0] - tr_scan_labels.dropna().shape[0], 19) tr_scan_labels.dropna(inplace=True) # Drop na values # Index should be < t1 self.assertTrue((tr_scan_labels.t1 > tr_scan_labels.index).all()) for int_index, (ret_v, bin_v) in zip([0, 2, 10, 20, 50], [(0.05037, 1), (0.0350, 1), (0.07508, 1), (0.05219, 1), (0.02447, 1)]): self.assertAlmostEqual(tr_scan_labels.iloc[int_index]['ret'], ret_v, delta=1e-4) self.assertEqual(tr_scan_labels.iloc[int_index]['bin'], bin_v) tr_scan_labels_none = trend_scanning_labels(self.eem_close, t_events=None, look_forward_window=20) tr_scan_labels_none.dropna(inplace=True) self.assertTrue((tr_scan_labels == tr_scan_labels_none).all().all())
def test_trend_scanning_labels_backward(self): """ Test trend scanning labels with backward-looking window """ t_events = self.eem_close.index tr_scan_labels = trend_scanning_labels(self.eem_close, t_events, 20, False) self.assertEqual(tr_scan_labels.shape[0], len(t_events)) # we have label value for all t events # Looking backwards, before 2008/5/22 was a strong positive trend self.assertTrue( set(tr_scan_labels.loc[pd.Timestamp(2008, 4, 29):pd. Timestamp(2008, 5, 22)].bin) == set([1])) self.assertEqual(tr_scan_labels.bin.value_counts()[-1], 86) # Number of -1 labels check self.assertEqual(tr_scan_labels.bin.value_counts()[1], 24) # First 20 labels are nan because we don't have data for look-backward period (20) self.assertEqual( tr_scan_labels.shape[0] - tr_scan_labels.dropna().shape[0], 19) tr_scan_labels.dropna(inplace=True) # Drop na values # Index should be > t1 self.assertTrue((tr_scan_labels.t1 < tr_scan_labels.index).all()) for int_index, (ret_v, bin_v) in zip([0, 2, 10, 20, 50], [(-0.0025, 1), (-0.0201, 1), (-0.0126, 1), (0.0185, -1), (0.0147, -1)]): self.assertAlmostEqual(tr_scan_labels.iloc[int_index]['ret'], ret_v, delta=1e-4) self.assertEqual(tr_scan_labels.iloc[int_index]['bin'], bin_v) tr_scan_labels_none = trend_scanning_labels(self.eem_close, t_events=None, observation_window=20, look_forward=False) tr_scan_labels_none.dropna(inplace=True) self.assertTrue((tr_scan_labels == tr_scan_labels_none).all().all())
max_abs_t_val = -np.inf max_t_val = None max_t_index = None for fwd in range(min_obs, len(lookForward)): y = lookForward.iloc[:fwd] tVal = tValOLS(y) if abs(tVal) > max_abs_t_val: max_abs_t_val = abs(tVal) max_t_val = tVal max_t_index = y.index[-1] ser.loc[curr_t, "t_val"] = max_t_val ser.loc[curr_t, "st"] = max_t_index # print(f"curr_t: {curr_t}, max_abs_t_val: {max_abs_t_val}, max_t_index: {max_t_index}") # print(ser[name].loc[max_t_index]) # print(ser[name].loc[curr_t]) ser.loc[curr_t, "ret"] = ser[name].loc[max_t_index] / ser[name].loc[curr_t] - 1 return ser df = pd.DataFrame({"close": 100 * np.sin(range(0, 90))}) print(df["close"]) trend_labelling(df["close"], span=20).to_csv("own.csv") trend_scanning_labels(price_series=df["close"], t_events=df.index, look_forward_window=20, min_sample_length=5).to_csv("mfinlab.csv")