def test_transform_stochastic_k_x(): transform_config = {'name': 'stochastic_k'} feature = FinancialFeature(name='high', transformation=transform_config, normalization=None, nbins=10, ndays=10, resample_minutes=0, start_market_minute=150, is_target=True, calendar=sample_market_calendar, local=False, length=10) transform = TransformStochasticK(transform_config) raw_dataframe = financial_data_fixtures[feature.name] processed_prediction_data_x = transform.transform_x(feature, raw_dataframe) columns = raw_dataframe.columns expected_result = ((raw_dataframe.iloc[-1] - raw_dataframe.min()) / (raw_dataframe.max() - raw_dataframe.min())) * 100. expected_result = np.expand_dims(expected_result, axis=0) expected_result = pd.DataFrame(expected_result, columns=columns) assert_almost_equal(processed_prediction_data_x.values, expected_result.values, ASSERT_NDECIMALS)
def test_transform_log_return_x(): transform_config = {'name': 'log-return'} feature = FinancialFeature(name='close', transformation=transform_config, normalization=None, nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35) transform = TransformLogReturn(transform_config) data_dict_x = financial_data_fixtures raw_dataframe = data_dict_x[feature.name] transformed_data = transform.transform_x(feature, raw_dataframe) symbol = 'AAPL' original_data = raw_dataframe[symbol] transformed_data = transformed_data[symbol] random_index = random.randrange(1, len(original_data) - 1) expected_value = np.log(original_data.iloc[random_index] / original_data.iloc[random_index - 1]) assert_almost_equal(transformed_data.iloc[random_index], expected_value, ASSERT_NDECIMALS) assert np.isnan(transformed_data.iloc[0])
def test_transform_ker_x(): transform_config = {'name': 'ker', 'lag': 20} feature = FinancialFeature( name='high', transformation=transform_config, normalization=None, nbins=10, ndays=10, resample_minutes=0, start_market_minute=150, is_target=True, calendar=sample_market_calendar, local=False, length=10 ) raw_dataframe = financial_data_fixtures[feature.name] transform = TransformKer(transform_config) processed_prediction_data_x = transform.transform_x(feature, raw_dataframe) direction = raw_dataframe.diff(transform.lag).abs() volatility = raw_dataframe.diff().abs().rolling(window=transform.lag).sum() direction.dropna(axis=0, inplace=True) volatility.dropna(axis=0, inplace=True) expected_result = direction / volatility expected_result.dropna(axis=0, inplace=True) assert_almost_equal(processed_prediction_data_x.values, expected_result.values, ASSERT_NDECIMALS)
def test_transform_log_return_y(): transform_config = {'name': 'log-return'} feature = FinancialFeature(name='close', transformation=transform_config, normalization=None, nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35) transform = TransformLogReturn(transform_config) data_dict_x = financial_data_fixtures raw_dataframe = data_dict_x[feature.name] data_frame_x = raw_dataframe.iloc[:-1] prediction_reference_data = data_frame_x.iloc[-1] data_frame_y = raw_dataframe.iloc[-1] transformed_data = transform.transform_y(feature, data_frame_y, prediction_reference_data) expected_log_returns = np.log(data_frame_y / prediction_reference_data) assert_almost_equal(transformed_data, expected_log_returns.values, ASSERT_NDECIMALS)
def test_transform_volatility_x(): transform_config = {'name': 'volatility', 'window': 10} feature = FinancialFeature( name='close', transformation={'name': 'volatility', 'window': 10}, normalization='standard', nbins=10, ndays=10, resample_minutes=0, start_market_minute=150, is_target=True, calendar=sample_market_calendar, local=True, length=10 ) transform = TransformVolatility(transform_config) raw_dataframe = financial_data_fixtures[feature.name] processed_prediction_data_x = transform.transform_x(feature, raw_dataframe) assert processed_prediction_data_x.shape == (256, 5)
def test_transform_ewma_x(): transform_config = {'name': 'ewma', 'halflife': 20} feature = FinancialFeature( name='close', transformation=transform_config, normalization='standard', nbins=10, ndays=10, resample_minutes=0, start_market_minute=150, is_target=True, calendar=sample_market_calendar, local=True, length=10 ) transform = TransformEWMA(transform_config) raw_dataframe = financial_data_fixtures[feature.name] processed_prediction_data_x = transform.transform_x(feature, raw_dataframe) expected_result = raw_dataframe.ewm(halflife=transform.halflife).mean() assert_almost_equal(processed_prediction_data_x.values, expected_result.values, ASSERT_NDECIMALS)
def test_transform_mtf_x(): transform_config = {'name': 'mtf', 'image_size': 24} feature = FinancialFeature(name='close', transformation=transform_config, normalization='standard', nbins=10, ndays=10, resample_minutes=0, start_market_minute=150, is_target=True, calendar=sample_market_calendar, local=True, length=10) transform = TransformMTF(transform_config) data_dict_x = financial_data_fixtures raw_dataframe = data_dict_x[feature.name] transformed_data = transform.transform_x(feature, raw_dataframe) assert transformed_data.shape == (transform.image_size**2, raw_dataframe.shape[1])
def test_tranform_gasf_x(): transform_config = {'name': 'gasf', 'image_size': 24} feature = FinancialFeature(name='close', transformation=transform_config, normalization='standard', nbins=10, ndays=10, resample_minutes=0, start_market_minute=150, is_target=True, calendar=sample_market_calendar, local=True, length=10) transform = TransformGASF(transform_config) raw_dataframe = financial_data_fixtures[feature.name] _perform_test(feature, raw_dataframe, transform)
def test_transform_x_log_return_with_local_feature(): transform_config = {'name': 'log-return'} feature = FinancialFeature(name='close', transformation=transform_config, normalization=None, nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=True, length=35) data_dict_x = financial_data_fixtures raw_dataframe = data_dict_x[feature.name] transform = TransformLogReturn(transform_config) transformed_data = transform.transform_x(feature, raw_dataframe)['AAPL'] assert not np.isnan(transformed_data.iloc[0])
def setUp(self): self.feature_close_with_value_transform = FinancialFeature( name='open', transformation={'name': 'value'}, normalization=None, nbins=5, ndays=2, resample_minutes=0, start_market_minute=30, is_target=True, calendar=sample_market_calendar, local=False, length=15, classify_per_series=True) self.feature_close_with_log_return_transform = FinancialFeature( name='close', transformation={'name': 'log-return'}, normalization=None, nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, classify_per_series=True) self.feature_high_with_log_return_transform = FinancialFeature( name='high', transformation={'name': 'log-return'}, normalization='standard', nbins=None, ndays=10, resample_minutes=0, start_market_minute=150, is_target=True, calendar=sample_market_calendar, local=False, length=69, classify_per_series=True) self.feature_list = [ self.feature_close_with_value_transform, self.feature_close_with_log_return_transform, self.feature_high_with_log_return_transform ]
'resample_minutes': 60, 'start_market_minute': 1, 'is_target': True, KEY_EXCHANGE: 'NYSE', 'local': True, 'length': 10 }, ] sample_fin_feature_list = FeatureList([ FinancialFeature(name='open', transformation={'name': 'value'}, normalization=None, nbins=5, ndays=2, resample_minutes=60, start_market_minute=1, is_target=False, calendar=sample_market_calendar, classify_per_series=False, normalise_per_series=False, local=True, length=10), FinancialFeature(name='close', transformation={'name': 'log-return'}, normalization=None, nbins=10, ndays=5, resample_minutes=60, start_market_minute=1, is_target=False, calendar=sample_market_calendar,
class TestFinancialFeature(TestCase): def setUp(self): self.feature_close_with_value_transform = FinancialFeature( name='open', transformation={'name': 'value'}, normalization=None, nbins=5, ndays=2, resample_minutes=0, start_market_minute=30, is_target=True, calendar=sample_market_calendar, local=False, length=15, classify_per_series=True) self.feature_close_with_log_return_transform = FinancialFeature( name='close', transformation={'name': 'log-return'}, normalization=None, nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, classify_per_series=True) self.feature_high_with_log_return_transform = FinancialFeature( name='high', transformation={'name': 'log-return'}, normalization='standard', nbins=None, ndays=10, resample_minutes=0, start_market_minute=150, is_target=True, calendar=sample_market_calendar, local=False, length=69, classify_per_series=True) self.feature_list = [ self.feature_close_with_value_transform, self.feature_close_with_log_return_transform, self.feature_high_with_log_return_transform ] def test_get_start_timestamp_x(self): start_date_str = '20150101' end_date_str = '20150501' market_open_list = sample_market_calendar.schedule( start_date_str, end_date_str).market_open prediction_timestamp = market_open_list[20] + timedelta(minutes=15) start_timestamp_x_1 = self.feature_close_with_value_transform._get_start_timestamp_x( prediction_timestamp) expected_start_timestamp_x1 = pd.Timestamp('2015-01-29 15:00:00+0000', tz='UTC') assert start_timestamp_x_1 == expected_start_timestamp_x1 start_timestamp_x_2 = self.feature_close_with_log_return_transform._get_start_timestamp_x( prediction_timestamp) expected_start_timestamp_x2 = pd.Timestamp('2015-01-26 16:00:00+0000', tz='UTC') assert start_timestamp_x_2 == expected_start_timestamp_x2 start_timestamp_x_3 = self.feature_high_with_log_return_transform._get_start_timestamp_x( prediction_timestamp) expected_start_timestamp_x3 = pd.Timestamp('2015-01-16 17:00:00+0000', tz='UTC') assert start_timestamp_x_3 == expected_start_timestamp_x3 def test_select_prediction_data(self): data_frame = financial_data_fixtures[ self.feature_close_with_value_transform.name] start_date = data_frame.index[0].date() end_date = data_frame.index[-1].date() market_open_list = sample_market_calendar.schedule( str(start_date), str(end_date)).market_open prediction_timestamp = market_open_list[20] + timedelta(minutes=15) selected_prediction_data = \ self.feature_close_with_value_transform._select_prediction_data_x(data_frame, prediction_timestamp) last_index = np.argwhere( data_frame.index <= prediction_timestamp)[-1][0] + 1 first_index = last_index - self.feature_close_with_value_transform.length expected_data_frame = data_frame.iloc[first_index:last_index] assert selected_prediction_data.equals(expected_data_frame) @staticmethod def _run_get_prediction_data_test(feature, expected_length): data_frame = financial_data_fixtures[feature.name] start_date = data_frame.index[0].date() end_date = data_frame.index[-1].date() market_open_list = sample_market_calendar.schedule( str(start_date), str(end_date)).market_open prediction_timestamp = market_open_list[20] + timedelta(minutes=30) target_timestamp = market_open_list[21] + timedelta(minutes=90) prediction_data_x = feature.get_prediction_features( data_frame, prediction_timestamp) prediction_data_y = feature.get_prediction_targets( data_frame, prediction_timestamp, target_timestamp) assert isinstance(prediction_data_x, pd.DataFrame) and isinstance( prediction_data_y, pd.Series) assert len(prediction_data_x) == expected_length assert_array_equal(prediction_data_x.columns, prediction_data_y.index) def test_get_prediction_data(self): expected_length_list = [15, 35, 69] for feature, expected_length in zip(self.feature_list, expected_length_list): self._run_get_prediction_data_test(feature, expected_length) def test_declassify_single_predict_y(self): train_labels = np.stack( (TEST_ARRAY, TEST_ARRAY, TEST_ARRAY, TEST_ARRAY, TEST_ARRAY)) predict_labels = { 'open': train_labels[:, int(0.5 * train_labels.shape[1])] } for feature in self.feature_list: if feature.nbins: predict_y = np.zeros_like(predict_labels[list( predict_labels.keys())[0]]) predict_y[0] = 1 else: predict_y = predict_labels with pytest.raises(NotImplementedError): feature.declassify_single_predict_y(predict_y)
def setUp(self): transform_config = {'name': 'log-return'} self.feature1 = FinancialFeature(name='close', transformation=transform_config, normalization='min_max', nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, normalise_per_series=True) self.feature2 = FinancialFeature(name='close', transformation=transform_config, normalization='standard', nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, normalise_per_series=True) self.feature3 = FinancialFeature(name='close', transformation=transform_config, normalization='gaussian', nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, normalise_per_series=True) self.feature4 = FinancialFeature(name='close', transformation=transform_config, normalization='robust', nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, normalise_per_series=True) transform_config_2 = {'name': 'value'} self.feature5 = FinancialFeature(name='close', transformation=transform_config_2, normalization='min_max', nbins=5, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, normalise_per_series=True, classify_per_series=True)
class TestFeatureNormalization(TestCase): def setUp(self): transform_config = {'name': 'log-return'} self.feature1 = FinancialFeature(name='close', transformation=transform_config, normalization='min_max', nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, normalise_per_series=True) self.feature2 = FinancialFeature(name='close', transformation=transform_config, normalization='standard', nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, normalise_per_series=True) self.feature3 = FinancialFeature(name='close', transformation=transform_config, normalization='gaussian', nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, normalise_per_series=True) self.feature4 = FinancialFeature(name='close', transformation=transform_config, normalization='robust', nbins=10, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, normalise_per_series=True) transform_config_2 = {'name': 'value'} self.feature5 = FinancialFeature(name='close', transformation=transform_config_2, normalization='min_max', nbins=5, ndays=5, resample_minutes=0, start_market_minute=90, is_target=True, calendar=sample_market_calendar, local=False, length=35, normalise_per_series=True, classify_per_series=True) def test_fit_normalisation(self): symbol_data1 = np.random.randn(10000) self.feature1.fit_normalisation(symbol_data=symbol_data1) assert np.isclose(self.feature1.scaler.data_max_, symbol_data1.max(), rtol=1e-4) assert np.isclose(self.feature1.scaler.data_min_, symbol_data1.min(), rtol=1e-4) self.feature2.fit_normalisation(symbol_data=symbol_data1) assert np.isclose(self.feature2.scaler.mean_, symbol_data1.mean(), rtol=1e-4) assert np.isclose(self.feature2.scaler.var_, symbol_data1.var(), rtol=1e-4) self.feature3.fit_normalisation(symbol_data=symbol_data1) assert self.feature3.scaler.references_.shape == (1000, ) assert self.feature3.scaler.quantiles_.shape == (1000, 1) self.feature4.fit_normalisation(symbol_data=symbol_data1) assert np.isclose(self.feature4.scaler.center_, np.median(symbol_data1), rtol=1e-4) def test_apply_normalisation(self): data = deepcopy(financial_data_fixtures['open']) for column in data.columns: self.feature1.fit_normalisation(symbol_data=data[column].values, symbol=column) self.feature1.apply_normalisation(data) np.testing.assert_allclose(data.max(), np.asarray([1., 1., 1., 1., 1.])) np.testing.assert_allclose(data.min(), np.asarray([0., 0., 0., 0., 0.])) for column in data.columns: self.feature2.fit_normalisation(symbol_data=data[column].values, symbol=column) self.feature2.apply_normalisation(data) np.testing.assert_allclose(data.mean(), np.asarray([0., 0., 0., 0., 0.]), atol=1e-4) for column in data.columns: self.feature3.fit_normalisation(symbol_data=data[column].values, symbol=column) self.feature3.apply_normalisation(data) np.testing.assert_allclose(data.mean(), np.asarray([0., 0., 0., 0., 0.]), atol=1e-3) for column in data.columns: self.feature4.fit_normalisation(symbol_data=data[column].values, symbol=column) self.feature4.apply_normalisation(data) np.testing.assert_allclose(np.median(data, axis=0), np.asarray([0., 0., 0., 0., 0.]), atol=1e-3) def test_apply_classification(self): symbols = ['SYM1', 'SYM2', 'SYM3'] feature = self.feature5 dataframe = pd.DataFrame([[5, 5, 5]], columns=symbols) symbol_data_1 = np.linspace(0, 10, 10) symbol_data_2 = np.linspace(0, 100, 100) feature.fit_classification('SYM1', symbol_data_1) feature.fit_classification('SYM2', symbol_data_2) classified_dataframe = feature.apply_classification(dataframe) expected_classified_dataframe = pd.DataFrame( [[0., 1.], [0., 0.], [1., 0.], [0., 0.], [0., 0.]], columns=symbols[:2]) assert classified_dataframe.equals(expected_classified_dataframe)